rope.rs

   1mod offset_utf16;
   2mod point;
   3mod point_utf16;
   4mod unclipped;
   5
   6use arrayvec::ArrayString;
   7use bromberg_sl2::{DigestString, HashMatrix};
   8use smallvec::SmallVec;
   9use std::{
  10    cmp, fmt, io, mem,
  11    ops::{AddAssign, Range},
  12    str,
  13};
  14use sum_tree::{Bias, Dimension, SumTree};
  15
  16pub use offset_utf16::OffsetUtf16;
  17pub use point::Point;
  18pub use point_utf16::PointUtf16;
  19pub use unclipped::Unclipped;
  20
  21#[cfg(test)]
  22const CHUNK_BASE: usize = 6;
  23
  24#[cfg(not(test))]
  25const CHUNK_BASE: usize = 16;
  26
  27#[derive(Clone, Default, Debug)]
  28pub struct Rope {
  29    chunks: SumTree<Chunk>,
  30}
  31
  32impl Rope {
  33    pub fn new() -> Self {
  34        Self::default()
  35    }
  36
  37    pub fn append(&mut self, rope: Rope) {
  38        let mut chunks = rope.chunks.cursor::<()>();
  39        chunks.next(&());
  40        if let Some(chunk) = chunks.item() {
  41            if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE)
  42                || chunk.0.len() < CHUNK_BASE
  43            {
  44                self.push(&chunk.0);
  45                chunks.next(&());
  46            }
  47        }
  48
  49        self.chunks.push_tree(chunks.suffix(&()), &());
  50        self.check_invariants();
  51    }
  52
  53    pub fn replace(&mut self, range: Range<usize>, text: &str) {
  54        let mut new_rope = Rope::new();
  55        let mut cursor = self.cursor(0);
  56        new_rope.append(cursor.slice(range.start));
  57        cursor.seek_forward(range.end);
  58        new_rope.push(text);
  59        new_rope.append(cursor.suffix());
  60        *self = new_rope;
  61    }
  62
  63    pub fn slice(&self, range: Range<usize>) -> Rope {
  64        let mut cursor = self.cursor(0);
  65        cursor.seek_forward(range.start);
  66        cursor.slice(range.end)
  67    }
  68
  69    pub fn slice_rows(&self, range: Range<u32>) -> Rope {
  70        //This would be more efficient with a forward advance after the first, but it's fine
  71        let start = self.point_to_offset(Point::new(range.start, 0));
  72        let end = self.point_to_offset(Point::new(range.end, 0));
  73        self.slice(start..end)
  74    }
  75
  76    pub fn push(&mut self, text: &str) {
  77        let mut new_chunks = SmallVec::<[_; 16]>::new();
  78        let mut new_chunk = ArrayString::new();
  79        for ch in text.chars() {
  80            if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
  81                new_chunks.push(Chunk(new_chunk));
  82                new_chunk = ArrayString::new();
  83            }
  84
  85            new_chunk.push(ch);
  86        }
  87        if !new_chunk.is_empty() {
  88            new_chunks.push(Chunk(new_chunk));
  89        }
  90
  91        let mut new_chunks = new_chunks.into_iter();
  92        let mut first_new_chunk = new_chunks.next();
  93        self.chunks.update_last(
  94            |last_chunk| {
  95                if let Some(first_new_chunk_ref) = first_new_chunk.as_mut() {
  96                    if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE {
  97                        last_chunk.0.push_str(&first_new_chunk.take().unwrap().0);
  98                    } else {
  99                        let mut text = ArrayString::<{ 4 * CHUNK_BASE }>::new();
 100                        text.push_str(&last_chunk.0);
 101                        text.push_str(&first_new_chunk_ref.0);
 102                        let (left, right) = text.split_at(find_split_ix(&text));
 103                        last_chunk.0.clear();
 104                        last_chunk.0.push_str(left);
 105                        first_new_chunk_ref.0.clear();
 106                        first_new_chunk_ref.0.push_str(right);
 107                    }
 108                }
 109            },
 110            &(),
 111        );
 112
 113        self.chunks
 114            .extend(first_new_chunk.into_iter().chain(new_chunks), &());
 115        self.check_invariants();
 116    }
 117
 118    pub fn push_front(&mut self, text: &str) {
 119        let suffix = mem::replace(self, Rope::from(text));
 120        self.append(suffix);
 121    }
 122
 123    fn check_invariants(&self) {
 124        #[cfg(test)]
 125        {
 126            // Ensure all chunks except maybe the last one are not underflowing.
 127            // Allow some wiggle room for multibyte characters at chunk boundaries.
 128            let mut chunks = self.chunks.cursor::<()>().peekable();
 129            while let Some(chunk) = chunks.next() {
 130                if chunks.peek().is_some() {
 131                    assert!(chunk.0.len() + 3 >= CHUNK_BASE);
 132                }
 133            }
 134        }
 135    }
 136
 137    pub fn summary(&self) -> TextSummary {
 138        self.chunks.summary().text.clone()
 139    }
 140
 141    pub fn len(&self) -> usize {
 142        self.chunks.extent(&())
 143    }
 144
 145    pub fn is_empty(&self) -> bool {
 146        self.len() == 0
 147    }
 148
 149    pub fn max_point(&self) -> Point {
 150        self.chunks.extent(&())
 151    }
 152
 153    pub fn max_point_utf16(&self) -> PointUtf16 {
 154        self.chunks.extent(&())
 155    }
 156
 157    pub fn cursor(&self, offset: usize) -> Cursor {
 158        Cursor::new(self, offset)
 159    }
 160
 161    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
 162        self.chars_at(0)
 163    }
 164
 165    pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 166        self.chunks_in_range(start..self.len()).flat_map(str::chars)
 167    }
 168
 169    pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 170        self.reversed_chunks_in_range(0..start)
 171            .flat_map(|chunk| chunk.chars().rev())
 172    }
 173
 174    pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes {
 175        Bytes::new(self, range)
 176    }
 177
 178    pub fn chunks(&self) -> Chunks {
 179        self.chunks_in_range(0..self.len())
 180    }
 181
 182    pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks {
 183        Chunks::new(self, range, false)
 184    }
 185
 186    pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks {
 187        Chunks::new(self, range, true)
 188    }
 189
 190    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 191        if offset >= self.summary().len {
 192            return self.summary().len_utf16;
 193        }
 194        let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>();
 195        cursor.seek(&offset, Bias::Left, &());
 196        let overshoot = offset - cursor.start().0;
 197        cursor.start().1
 198            + cursor.item().map_or(Default::default(), |chunk| {
 199                chunk.offset_to_offset_utf16(overshoot)
 200            })
 201    }
 202
 203    pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
 204        if offset >= self.summary().len_utf16 {
 205            return self.summary().len;
 206        }
 207        let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>();
 208        cursor.seek(&offset, Bias::Left, &());
 209        let overshoot = offset - cursor.start().0;
 210        cursor.start().1
 211            + cursor.item().map_or(Default::default(), |chunk| {
 212                chunk.offset_utf16_to_offset(overshoot)
 213            })
 214    }
 215
 216    pub fn offset_to_point(&self, offset: usize) -> Point {
 217        if offset >= self.summary().len {
 218            return self.summary().lines;
 219        }
 220        let mut cursor = self.chunks.cursor::<(usize, Point)>();
 221        cursor.seek(&offset, Bias::Left, &());
 222        let overshoot = offset - cursor.start().0;
 223        cursor.start().1
 224            + cursor
 225                .item()
 226                .map_or(Point::zero(), |chunk| chunk.offset_to_point(overshoot))
 227    }
 228
 229    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 230        if offset >= self.summary().len {
 231            return self.summary().lines_utf16();
 232        }
 233        let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>();
 234        cursor.seek(&offset, Bias::Left, &());
 235        let overshoot = offset - cursor.start().0;
 236        cursor.start().1
 237            + cursor.item().map_or(PointUtf16::zero(), |chunk| {
 238                chunk.offset_to_point_utf16(overshoot)
 239            })
 240    }
 241
 242    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 243        if point >= self.summary().lines {
 244            return self.summary().lines_utf16();
 245        }
 246        let mut cursor = self.chunks.cursor::<(Point, PointUtf16)>();
 247        cursor.seek(&point, Bias::Left, &());
 248        let overshoot = point - cursor.start().0;
 249        cursor.start().1
 250            + cursor.item().map_or(PointUtf16::zero(), |chunk| {
 251                chunk.point_to_point_utf16(overshoot)
 252            })
 253    }
 254
 255    pub fn point_to_offset(&self, point: Point) -> usize {
 256        if point >= self.summary().lines {
 257            return self.summary().len;
 258        }
 259        let mut cursor = self.chunks.cursor::<(Point, usize)>();
 260        cursor.seek(&point, Bias::Left, &());
 261        let overshoot = point - cursor.start().0;
 262        cursor.start().1
 263            + cursor
 264                .item()
 265                .map_or(0, |chunk| chunk.point_to_offset(overshoot))
 266    }
 267
 268    pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
 269        self.point_utf16_to_offset_impl(point, false)
 270    }
 271
 272    pub fn unclipped_point_utf16_to_offset(&self, point: Unclipped<PointUtf16>) -> usize {
 273        self.point_utf16_to_offset_impl(point.0, true)
 274    }
 275
 276    fn point_utf16_to_offset_impl(&self, point: PointUtf16, clip: bool) -> usize {
 277        if point >= self.summary().lines_utf16() {
 278            return self.summary().len;
 279        }
 280        let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>();
 281        cursor.seek(&point, Bias::Left, &());
 282        let overshoot = point - cursor.start().0;
 283        cursor.start().1
 284            + cursor
 285                .item()
 286                .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot, clip))
 287    }
 288
 289    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 290        if point.0 >= self.summary().lines_utf16() {
 291            return self.summary().lines;
 292        }
 293        let mut cursor = self.chunks.cursor::<(PointUtf16, Point)>();
 294        cursor.seek(&point.0, Bias::Left, &());
 295        let overshoot = Unclipped(point.0 - cursor.start().0);
 296        cursor.start().1
 297            + cursor.item().map_or(Point::zero(), |chunk| {
 298                chunk.unclipped_point_utf16_to_point(overshoot)
 299            })
 300    }
 301
 302    pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize {
 303        let mut cursor = self.chunks.cursor::<usize>();
 304        cursor.seek(&offset, Bias::Left, &());
 305        if let Some(chunk) = cursor.item() {
 306            let mut ix = offset - cursor.start();
 307            while !chunk.0.is_char_boundary(ix) {
 308                match bias {
 309                    Bias::Left => {
 310                        ix -= 1;
 311                        offset -= 1;
 312                    }
 313                    Bias::Right => {
 314                        ix += 1;
 315                        offset += 1;
 316                    }
 317                }
 318            }
 319            offset
 320        } else {
 321            self.summary().len
 322        }
 323    }
 324
 325    pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 326        let mut cursor = self.chunks.cursor::<OffsetUtf16>();
 327        cursor.seek(&offset, Bias::Right, &());
 328        if let Some(chunk) = cursor.item() {
 329            let overshoot = offset - cursor.start();
 330            *cursor.start() + chunk.clip_offset_utf16(overshoot, bias)
 331        } else {
 332            self.summary().len_utf16
 333        }
 334    }
 335
 336    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 337        let mut cursor = self.chunks.cursor::<Point>();
 338        cursor.seek(&point, Bias::Right, &());
 339        if let Some(chunk) = cursor.item() {
 340            let overshoot = point - cursor.start();
 341            *cursor.start() + chunk.clip_point(overshoot, bias)
 342        } else {
 343            self.summary().lines
 344        }
 345    }
 346
 347    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 348        let mut cursor = self.chunks.cursor::<PointUtf16>();
 349        cursor.seek(&point.0, Bias::Right, &());
 350        if let Some(chunk) = cursor.item() {
 351            let overshoot = Unclipped(point.0 - cursor.start());
 352            *cursor.start() + chunk.clip_point_utf16(overshoot, bias)
 353        } else {
 354            self.summary().lines_utf16()
 355        }
 356    }
 357
 358    pub fn line_len(&self, row: u32) -> u32 {
 359        self.clip_point(Point::new(row, u32::MAX), Bias::Left)
 360            .column
 361    }
 362
 363    pub fn fingerprint(&self) -> String {
 364        self.chunks.summary().fingerprint.to_hex()
 365    }
 366}
 367
 368impl<'a> From<&'a str> for Rope {
 369    fn from(text: &'a str) -> Self {
 370        let mut rope = Self::new();
 371        rope.push(text);
 372        rope
 373    }
 374}
 375
 376impl fmt::Display for Rope {
 377    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 378        for chunk in self.chunks() {
 379            write!(f, "{}", chunk)?;
 380        }
 381        Ok(())
 382    }
 383}
 384
 385pub struct Cursor<'a> {
 386    rope: &'a Rope,
 387    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 388    offset: usize,
 389}
 390
 391impl<'a> Cursor<'a> {
 392    pub fn new(rope: &'a Rope, offset: usize) -> Self {
 393        let mut chunks = rope.chunks.cursor();
 394        chunks.seek(&offset, Bias::Right, &());
 395        Self {
 396            rope,
 397            chunks,
 398            offset,
 399        }
 400    }
 401
 402    pub fn seek_forward(&mut self, end_offset: usize) {
 403        debug_assert!(end_offset >= self.offset);
 404
 405        self.chunks.seek_forward(&end_offset, Bias::Right, &());
 406        self.offset = end_offset;
 407    }
 408
 409    pub fn slice(&mut self, end_offset: usize) -> Rope {
 410        debug_assert!(
 411            end_offset >= self.offset,
 412            "cannot slice backwards from {} to {}",
 413            self.offset,
 414            end_offset
 415        );
 416
 417        let mut slice = Rope::new();
 418        if let Some(start_chunk) = self.chunks.item() {
 419            let start_ix = self.offset - self.chunks.start();
 420            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
 421            slice.push(&start_chunk.0[start_ix..end_ix]);
 422        }
 423
 424        if end_offset > self.chunks.end(&()) {
 425            self.chunks.next(&());
 426            slice.append(Rope {
 427                chunks: self.chunks.slice(&end_offset, Bias::Right, &()),
 428            });
 429            if let Some(end_chunk) = self.chunks.item() {
 430                let end_ix = end_offset - self.chunks.start();
 431                slice.push(&end_chunk.0[..end_ix]);
 432            }
 433        }
 434
 435        self.offset = end_offset;
 436        slice
 437    }
 438
 439    pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
 440        debug_assert!(end_offset >= self.offset);
 441
 442        let mut summary = D::default();
 443        if let Some(start_chunk) = self.chunks.item() {
 444            let start_ix = self.offset - self.chunks.start();
 445            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
 446            summary.add_assign(&D::from_text_summary(&TextSummary::from(
 447                &start_chunk.0[start_ix..end_ix],
 448            )));
 449        }
 450
 451        if end_offset > self.chunks.end(&()) {
 452            self.chunks.next(&());
 453            summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right, &()));
 454            if let Some(end_chunk) = self.chunks.item() {
 455                let end_ix = end_offset - self.chunks.start();
 456                summary.add_assign(&D::from_text_summary(&TextSummary::from(
 457                    &end_chunk.0[..end_ix],
 458                )));
 459            }
 460        }
 461
 462        self.offset = end_offset;
 463        summary
 464    }
 465
 466    pub fn suffix(mut self) -> Rope {
 467        self.slice(self.rope.chunks.extent(&()))
 468    }
 469
 470    pub fn offset(&self) -> usize {
 471        self.offset
 472    }
 473}
 474
 475pub struct Chunks<'a> {
 476    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 477    range: Range<usize>,
 478    reversed: bool,
 479}
 480
 481impl<'a> Chunks<'a> {
 482    pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
 483        let mut chunks = rope.chunks.cursor();
 484        if reversed {
 485            chunks.seek(&range.end, Bias::Left, &());
 486        } else {
 487            chunks.seek(&range.start, Bias::Right, &());
 488        }
 489        Self {
 490            chunks,
 491            range,
 492            reversed,
 493        }
 494    }
 495
 496    pub fn offset(&self) -> usize {
 497        if self.reversed {
 498            self.range.end.min(self.chunks.end(&()))
 499        } else {
 500            self.range.start.max(*self.chunks.start())
 501        }
 502    }
 503
 504    pub fn seek(&mut self, offset: usize) {
 505        let bias = if self.reversed {
 506            Bias::Left
 507        } else {
 508            Bias::Right
 509        };
 510
 511        if offset >= self.chunks.end(&()) {
 512            self.chunks.seek_forward(&offset, bias, &());
 513        } else {
 514            self.chunks.seek(&offset, bias, &());
 515        }
 516
 517        if self.reversed {
 518            self.range.end = offset;
 519        } else {
 520            self.range.start = offset;
 521        }
 522    }
 523
 524    pub fn peek(&self) -> Option<&'a str> {
 525        let chunk = self.chunks.item()?;
 526        if self.reversed && self.range.start >= self.chunks.end(&()) {
 527            return None;
 528        }
 529        let chunk_start = *self.chunks.start();
 530        if self.range.end <= chunk_start {
 531            return None;
 532        }
 533
 534        let start = self.range.start.saturating_sub(chunk_start);
 535        let end = self.range.end - chunk_start;
 536        Some(&chunk.0[start..chunk.0.len().min(end)])
 537    }
 538}
 539
 540impl<'a> Iterator for Chunks<'a> {
 541    type Item = &'a str;
 542
 543    fn next(&mut self) -> Option<Self::Item> {
 544        let result = self.peek();
 545        if result.is_some() {
 546            if self.reversed {
 547                self.chunks.prev(&());
 548            } else {
 549                self.chunks.next(&());
 550            }
 551        }
 552        result
 553    }
 554}
 555
 556pub struct Bytes<'a> {
 557    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 558    range: Range<usize>,
 559}
 560
 561impl<'a> Bytes<'a> {
 562    pub fn new(rope: &'a Rope, range: Range<usize>) -> Self {
 563        let mut chunks = rope.chunks.cursor();
 564        chunks.seek(&range.start, Bias::Right, &());
 565        Self { chunks, range }
 566    }
 567
 568    pub fn peek(&self) -> Option<&'a [u8]> {
 569        let chunk = self.chunks.item()?;
 570        let chunk_start = *self.chunks.start();
 571        if self.range.end <= chunk_start {
 572            return None;
 573        }
 574
 575        let start = self.range.start.saturating_sub(chunk_start);
 576        let end = self.range.end - chunk_start;
 577        Some(&chunk.0.as_bytes()[start..chunk.0.len().min(end)])
 578    }
 579}
 580
 581impl<'a> Iterator for Bytes<'a> {
 582    type Item = &'a [u8];
 583
 584    fn next(&mut self) -> Option<Self::Item> {
 585        let result = self.peek();
 586        if result.is_some() {
 587            self.chunks.next(&());
 588        }
 589        result
 590    }
 591}
 592
 593impl<'a> io::Read for Bytes<'a> {
 594    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
 595        if let Some(chunk) = self.peek() {
 596            let len = cmp::min(buf.len(), chunk.len());
 597            buf[..len].copy_from_slice(&chunk[..len]);
 598            self.range.start += len;
 599            if len == chunk.len() {
 600                self.chunks.next(&());
 601            }
 602            Ok(len)
 603        } else {
 604            Ok(0)
 605        }
 606    }
 607}
 608
 609#[derive(Clone, Debug, Default)]
 610struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
 611
 612impl Chunk {
 613    fn offset_to_offset_utf16(&self, target: usize) -> OffsetUtf16 {
 614        let mut offset = 0;
 615        let mut offset_utf16 = OffsetUtf16(0);
 616        for ch in self.0.chars() {
 617            if offset >= target {
 618                break;
 619            }
 620
 621            offset += ch.len_utf8();
 622            offset_utf16.0 += ch.len_utf16();
 623        }
 624        offset_utf16
 625    }
 626
 627    fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 628        let mut offset_utf16 = OffsetUtf16(0);
 629        let mut offset = 0;
 630        for ch in self.0.chars() {
 631            if offset_utf16 >= target {
 632                break;
 633            }
 634
 635            offset += ch.len_utf8();
 636            offset_utf16.0 += ch.len_utf16();
 637        }
 638        offset
 639    }
 640
 641    fn offset_to_point(&self, target: usize) -> Point {
 642        let mut offset = 0;
 643        let mut point = Point::new(0, 0);
 644        for ch in self.0.chars() {
 645            if offset >= target {
 646                break;
 647            }
 648
 649            if ch == '\n' {
 650                point.row += 1;
 651                point.column = 0;
 652            } else {
 653                point.column += ch.len_utf8() as u32;
 654            }
 655            offset += ch.len_utf8();
 656        }
 657        point
 658    }
 659
 660    fn offset_to_point_utf16(&self, target: usize) -> PointUtf16 {
 661        let mut offset = 0;
 662        let mut point = PointUtf16::new(0, 0);
 663        for ch in self.0.chars() {
 664            if offset >= target {
 665                break;
 666            }
 667
 668            if ch == '\n' {
 669                point.row += 1;
 670                point.column = 0;
 671            } else {
 672                point.column += ch.len_utf16() as u32;
 673            }
 674            offset += ch.len_utf8();
 675        }
 676        point
 677    }
 678
 679    fn point_to_offset(&self, target: Point) -> usize {
 680        let mut offset = 0;
 681        let mut point = Point::new(0, 0);
 682        for ch in self.0.chars() {
 683            if point >= target {
 684                if point > target {
 685                    panic!("point {:?} is inside of character {:?}", target, ch);
 686                }
 687                break;
 688            }
 689
 690            if ch == '\n' {
 691                point.row += 1;
 692                if point.row > target.row {
 693                    panic!(
 694                        "point {:?} is beyond the end of a line with length {}",
 695                        target, point.column
 696                    );
 697                }
 698                point.column = 0;
 699            } else {
 700                point.column += ch.len_utf8() as u32;
 701            }
 702            offset += ch.len_utf8();
 703        }
 704        offset
 705    }
 706
 707    fn point_to_point_utf16(&self, target: Point) -> PointUtf16 {
 708        let mut point = Point::zero();
 709        let mut point_utf16 = PointUtf16::new(0, 0);
 710        for ch in self.0.chars() {
 711            if point >= target {
 712                break;
 713            }
 714
 715            if ch == '\n' {
 716                point_utf16.row += 1;
 717                point_utf16.column = 0;
 718                point.row += 1;
 719                point.column = 0;
 720            } else {
 721                point_utf16.column += ch.len_utf16() as u32;
 722                point.column += ch.len_utf8() as u32;
 723            }
 724        }
 725        point_utf16
 726    }
 727
 728    fn point_utf16_to_offset(&self, target: PointUtf16, clip: bool) -> usize {
 729        let mut offset = 0;
 730        let mut point = PointUtf16::new(0, 0);
 731
 732        for ch in self.0.chars() {
 733            if point == target {
 734                break;
 735            }
 736
 737            if ch == '\n' {
 738                point.row += 1;
 739                point.column = 0;
 740                if point.row > target.row {
 741                    if clip {
 742                        // Return the offset of the newline
 743                        return offset;
 744                    }
 745                    panic!(
 746                        "point {:?} is beyond the end of a line with length {}",
 747                        target, point.column
 748                    );
 749                }
 750            } else {
 751                point.column += ch.len_utf16() as u32;
 752            }
 753
 754            if point > target {
 755                if clip {
 756                    // Return the offset of the codepoint which we have landed within, bias left
 757                    return offset;
 758                }
 759                panic!("point {:?} is inside of codepoint {:?}", target, ch);
 760            }
 761
 762            offset += ch.len_utf8();
 763        }
 764
 765        offset
 766    }
 767
 768    fn unclipped_point_utf16_to_point(&self, target: Unclipped<PointUtf16>) -> Point {
 769        let mut point = Point::zero();
 770        let mut point_utf16 = PointUtf16::zero();
 771
 772        for ch in self.0.chars() {
 773            if point_utf16 == target.0 {
 774                break;
 775            }
 776
 777            if point_utf16 > target.0 {
 778                // If the point is past the end of a line or inside of a code point,
 779                // return the last valid point before the target.
 780                return point;
 781            }
 782
 783            if ch == '\n' {
 784                point_utf16 += PointUtf16::new(1, 0);
 785                point += Point::new(1, 0);
 786            } else {
 787                point_utf16 += PointUtf16::new(0, ch.len_utf16() as u32);
 788                point += Point::new(0, ch.len_utf8() as u32);
 789            }
 790        }
 791
 792        point
 793    }
 794
 795    fn clip_point(&self, target: Point, bias: Bias) -> Point {
 796        for (row, line) in self.0.split('\n').enumerate() {
 797            if row == target.row as usize {
 798                let mut column = target.column.min(line.len() as u32);
 799                while !line.is_char_boundary(column as usize) {
 800                    match bias {
 801                        Bias::Left => column -= 1,
 802                        Bias::Right => column += 1,
 803                    }
 804                }
 805                return Point::new(row as u32, column);
 806            }
 807        }
 808        unreachable!()
 809    }
 810
 811    fn clip_point_utf16(&self, target: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 812        for (row, line) in self.0.split('\n').enumerate() {
 813            if row == target.0.row as usize {
 814                let mut code_units = line.encode_utf16();
 815                let mut column = code_units.by_ref().take(target.0.column as usize).count();
 816                if char::decode_utf16(code_units).next().transpose().is_err() {
 817                    match bias {
 818                        Bias::Left => column -= 1,
 819                        Bias::Right => column += 1,
 820                    }
 821                }
 822                return PointUtf16::new(row as u32, column as u32);
 823            }
 824        }
 825        unreachable!()
 826    }
 827
 828    fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 829        let mut code_units = self.0.encode_utf16();
 830        let mut offset = code_units.by_ref().take(target.0 as usize).count();
 831        if char::decode_utf16(code_units).next().transpose().is_err() {
 832            match bias {
 833                Bias::Left => offset -= 1,
 834                Bias::Right => offset += 1,
 835            }
 836        }
 837        OffsetUtf16(offset)
 838    }
 839}
 840
 841impl sum_tree::Item for Chunk {
 842    type Summary = ChunkSummary;
 843
 844    fn summary(&self) -> Self::Summary {
 845        ChunkSummary::from(self.0.as_str())
 846    }
 847}
 848
 849#[derive(Clone, Debug, Default, Eq, PartialEq)]
 850pub struct ChunkSummary {
 851    text: TextSummary,
 852    fingerprint: HashMatrix,
 853}
 854
 855impl<'a> From<&'a str> for ChunkSummary {
 856    fn from(text: &'a str) -> Self {
 857        Self {
 858            text: TextSummary::from(text),
 859            fingerprint: bromberg_sl2::hash_strict(text.as_bytes()),
 860        }
 861    }
 862}
 863
 864impl sum_tree::Summary for ChunkSummary {
 865    type Context = ();
 866
 867    fn add_summary(&mut self, summary: &Self, _: &()) {
 868        self.text += &summary.text;
 869        self.fingerprint = self.fingerprint * summary.fingerprint;
 870    }
 871}
 872
 873#[derive(Clone, Debug, Default, Eq, PartialEq)]
 874pub struct TextSummary {
 875    pub len: usize,
 876    pub len_utf16: OffsetUtf16,
 877    pub lines: Point,
 878    pub first_line_chars: u32,
 879    pub last_line_chars: u32,
 880    pub last_line_len_utf16: u32,
 881    pub longest_row: u32,
 882    pub longest_row_chars: u32,
 883}
 884
 885impl TextSummary {
 886    pub fn lines_utf16(&self) -> PointUtf16 {
 887        PointUtf16 {
 888            row: self.lines.row,
 889            column: self.last_line_len_utf16,
 890        }
 891    }
 892}
 893
 894impl<'a> From<&'a str> for TextSummary {
 895    fn from(text: &'a str) -> Self {
 896        let mut len_utf16 = OffsetUtf16(0);
 897        let mut lines = Point::new(0, 0);
 898        let mut first_line_chars = 0;
 899        let mut last_line_chars = 0;
 900        let mut last_line_len_utf16 = 0;
 901        let mut longest_row = 0;
 902        let mut longest_row_chars = 0;
 903        for c in text.chars() {
 904            len_utf16.0 += c.len_utf16();
 905
 906            if c == '\n' {
 907                lines += Point::new(1, 0);
 908                last_line_len_utf16 = 0;
 909                last_line_chars = 0;
 910            } else {
 911                lines.column += c.len_utf8() as u32;
 912                last_line_len_utf16 += c.len_utf16() as u32;
 913                last_line_chars += 1;
 914            }
 915
 916            if lines.row == 0 {
 917                first_line_chars = last_line_chars;
 918            }
 919
 920            if last_line_chars > longest_row_chars {
 921                longest_row = lines.row;
 922                longest_row_chars = last_line_chars;
 923            }
 924        }
 925
 926        TextSummary {
 927            len: text.len(),
 928            len_utf16,
 929            lines,
 930            first_line_chars,
 931            last_line_chars,
 932            last_line_len_utf16,
 933            longest_row,
 934            longest_row_chars,
 935        }
 936    }
 937}
 938
 939impl sum_tree::Summary for TextSummary {
 940    type Context = ();
 941
 942    fn add_summary(&mut self, summary: &Self, _: &Self::Context) {
 943        *self += summary;
 944    }
 945}
 946
 947impl std::ops::Add<Self> for TextSummary {
 948    type Output = Self;
 949
 950    fn add(mut self, rhs: Self) -> Self::Output {
 951        AddAssign::add_assign(&mut self, &rhs);
 952        self
 953    }
 954}
 955
 956impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
 957    fn add_assign(&mut self, other: &'a Self) {
 958        let joined_chars = self.last_line_chars + other.first_line_chars;
 959        if joined_chars > self.longest_row_chars {
 960            self.longest_row = self.lines.row;
 961            self.longest_row_chars = joined_chars;
 962        }
 963        if other.longest_row_chars > self.longest_row_chars {
 964            self.longest_row = self.lines.row + other.longest_row;
 965            self.longest_row_chars = other.longest_row_chars;
 966        }
 967
 968        if self.lines.row == 0 {
 969            self.first_line_chars += other.first_line_chars;
 970        }
 971
 972        if other.lines.row == 0 {
 973            self.last_line_chars += other.first_line_chars;
 974            self.last_line_len_utf16 += other.last_line_len_utf16;
 975        } else {
 976            self.last_line_chars = other.last_line_chars;
 977            self.last_line_len_utf16 = other.last_line_len_utf16;
 978        }
 979
 980        self.len += other.len;
 981        self.len_utf16 += other.len_utf16;
 982        self.lines += other.lines;
 983    }
 984}
 985
 986impl std::ops::AddAssign<Self> for TextSummary {
 987    fn add_assign(&mut self, other: Self) {
 988        *self += &other;
 989    }
 990}
 991
 992pub trait TextDimension: 'static + for<'a> Dimension<'a, ChunkSummary> {
 993    fn from_text_summary(summary: &TextSummary) -> Self;
 994    fn add_assign(&mut self, other: &Self);
 995}
 996
 997impl<D1: TextDimension, D2: TextDimension> TextDimension for (D1, D2) {
 998    fn from_text_summary(summary: &TextSummary) -> Self {
 999        (
1000            D1::from_text_summary(summary),
1001            D2::from_text_summary(summary),
1002        )
1003    }
1004
1005    fn add_assign(&mut self, other: &Self) {
1006        self.0.add_assign(&other.0);
1007        self.1.add_assign(&other.1);
1008    }
1009}
1010
1011impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
1012    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1013        *self += &summary.text;
1014    }
1015}
1016
1017impl TextDimension for TextSummary {
1018    fn from_text_summary(summary: &TextSummary) -> Self {
1019        summary.clone()
1020    }
1021
1022    fn add_assign(&mut self, other: &Self) {
1023        *self += other;
1024    }
1025}
1026
1027impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
1028    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1029        *self += summary.text.len;
1030    }
1031}
1032
1033impl TextDimension for usize {
1034    fn from_text_summary(summary: &TextSummary) -> Self {
1035        summary.len
1036    }
1037
1038    fn add_assign(&mut self, other: &Self) {
1039        *self += other;
1040    }
1041}
1042
1043impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
1044    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1045        *self += summary.text.len_utf16;
1046    }
1047}
1048
1049impl TextDimension for OffsetUtf16 {
1050    fn from_text_summary(summary: &TextSummary) -> Self {
1051        summary.len_utf16
1052    }
1053
1054    fn add_assign(&mut self, other: &Self) {
1055        *self += other;
1056    }
1057}
1058
1059impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
1060    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1061        *self += summary.text.lines;
1062    }
1063}
1064
1065impl TextDimension for Point {
1066    fn from_text_summary(summary: &TextSummary) -> Self {
1067        summary.lines
1068    }
1069
1070    fn add_assign(&mut self, other: &Self) {
1071        *self += other;
1072    }
1073}
1074
1075impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
1076    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1077        *self += summary.text.lines_utf16();
1078    }
1079}
1080
1081impl TextDimension for PointUtf16 {
1082    fn from_text_summary(summary: &TextSummary) -> Self {
1083        summary.lines_utf16()
1084    }
1085
1086    fn add_assign(&mut self, other: &Self) {
1087        *self += other;
1088    }
1089}
1090
1091fn find_split_ix(text: &str) -> usize {
1092    let mut ix = text.len() / 2;
1093    while !text.is_char_boundary(ix) {
1094        if ix < 2 * CHUNK_BASE {
1095            ix += 1;
1096        } else {
1097            ix = (text.len() / 2) - 1;
1098            break;
1099        }
1100    }
1101    while !text.is_char_boundary(ix) {
1102        ix -= 1;
1103    }
1104
1105    debug_assert!(ix <= 2 * CHUNK_BASE);
1106    debug_assert!(text.len() - ix <= 2 * CHUNK_BASE);
1107    ix
1108}
1109
1110#[cfg(test)]
1111mod tests {
1112    use super::*;
1113    use rand::prelude::*;
1114    use std::{cmp::Ordering, env, io::Read};
1115    use util::RandomCharIter;
1116    use Bias::{Left, Right};
1117
1118    #[test]
1119    fn test_all_4_byte_chars() {
1120        let mut rope = Rope::new();
1121        let text = "🏀".repeat(256);
1122        rope.push(&text);
1123        assert_eq!(rope.text(), text);
1124    }
1125
1126    #[test]
1127    fn test_clip() {
1128        let rope = Rope::from("🧘");
1129
1130        assert_eq!(rope.clip_offset(1, Bias::Left), 0);
1131        assert_eq!(rope.clip_offset(1, Bias::Right), 4);
1132        assert_eq!(rope.clip_offset(5, Bias::Right), 4);
1133
1134        assert_eq!(
1135            rope.clip_point(Point::new(0, 1), Bias::Left),
1136            Point::new(0, 0)
1137        );
1138        assert_eq!(
1139            rope.clip_point(Point::new(0, 1), Bias::Right),
1140            Point::new(0, 4)
1141        );
1142        assert_eq!(
1143            rope.clip_point(Point::new(0, 5), Bias::Right),
1144            Point::new(0, 4)
1145        );
1146
1147        assert_eq!(
1148            rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Left),
1149            PointUtf16::new(0, 0)
1150        );
1151        assert_eq!(
1152            rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Right),
1153            PointUtf16::new(0, 2)
1154        );
1155        assert_eq!(
1156            rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 3)), Bias::Right),
1157            PointUtf16::new(0, 2)
1158        );
1159
1160        assert_eq!(
1161            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
1162            OffsetUtf16(0)
1163        );
1164        assert_eq!(
1165            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
1166            OffsetUtf16(2)
1167        );
1168        assert_eq!(
1169            rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
1170            OffsetUtf16(2)
1171        );
1172    }
1173
1174    #[gpui::test(iterations = 100)]
1175    fn test_random_rope(mut rng: StdRng) {
1176        let operations = env::var("OPERATIONS")
1177            .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1178            .unwrap_or(10);
1179
1180        let mut expected = String::new();
1181        let mut actual = Rope::new();
1182        for _ in 0..operations {
1183            let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1184            let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1185            let len = rng.gen_range(0..=64);
1186            let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1187
1188            let mut new_actual = Rope::new();
1189            let mut cursor = actual.cursor(0);
1190            new_actual.append(cursor.slice(start_ix));
1191            new_actual.push(&new_text);
1192            cursor.seek_forward(end_ix);
1193            new_actual.append(cursor.suffix());
1194            actual = new_actual;
1195
1196            expected.replace_range(start_ix..end_ix, &new_text);
1197
1198            assert_eq!(actual.text(), expected);
1199            log::info!("text: {:?}", expected);
1200
1201            for _ in 0..5 {
1202                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1203                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1204
1205                let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1206                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1207
1208                let mut actual_text = String::new();
1209                actual
1210                    .bytes_in_range(start_ix..end_ix)
1211                    .read_to_string(&mut actual_text)
1212                    .unwrap();
1213                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1214
1215                assert_eq!(
1216                    actual
1217                        .reversed_chunks_in_range(start_ix..end_ix)
1218                        .collect::<Vec<&str>>()
1219                        .into_iter()
1220                        .rev()
1221                        .collect::<String>(),
1222                    &expected[start_ix..end_ix]
1223                );
1224            }
1225
1226            let mut offset_utf16 = OffsetUtf16(0);
1227            let mut point = Point::new(0, 0);
1228            let mut point_utf16 = PointUtf16::new(0, 0);
1229            for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
1230                assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
1231                assert_eq!(
1232                    actual.offset_to_point_utf16(ix),
1233                    point_utf16,
1234                    "offset_to_point_utf16({})",
1235                    ix
1236                );
1237                assert_eq!(
1238                    actual.point_to_offset(point),
1239                    ix,
1240                    "point_to_offset({:?})",
1241                    point
1242                );
1243                assert_eq!(
1244                    actual.point_utf16_to_offset(point_utf16),
1245                    ix,
1246                    "point_utf16_to_offset({:?})",
1247                    point_utf16
1248                );
1249                assert_eq!(
1250                    actual.offset_to_offset_utf16(ix),
1251                    offset_utf16,
1252                    "offset_to_offset_utf16({:?})",
1253                    ix
1254                );
1255                assert_eq!(
1256                    actual.offset_utf16_to_offset(offset_utf16),
1257                    ix,
1258                    "offset_utf16_to_offset({:?})",
1259                    offset_utf16
1260                );
1261                if ch == '\n' {
1262                    point += Point::new(1, 0);
1263                    point_utf16 += PointUtf16::new(1, 0);
1264                } else {
1265                    point.column += ch.len_utf8() as u32;
1266                    point_utf16.column += ch.len_utf16() as u32;
1267                }
1268                offset_utf16.0 += ch.len_utf16();
1269            }
1270
1271            let mut offset_utf16 = OffsetUtf16(0);
1272            let mut point_utf16 = Unclipped(PointUtf16::zero());
1273            for unit in expected.encode_utf16() {
1274                let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
1275                let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
1276                assert!(right_offset >= left_offset);
1277                // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
1278                actual.offset_utf16_to_offset(left_offset);
1279                actual.offset_utf16_to_offset(right_offset);
1280
1281                let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
1282                let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
1283                assert!(right_point >= left_point);
1284                // Ensure translating valid UTF-16 points to offsets doesn't panic.
1285                actual.point_utf16_to_offset(left_point);
1286                actual.point_utf16_to_offset(right_point);
1287
1288                offset_utf16.0 += 1;
1289                if unit == b'\n' as u16 {
1290                    point_utf16.0 += PointUtf16::new(1, 0);
1291                } else {
1292                    point_utf16.0 += PointUtf16::new(0, 1);
1293                }
1294            }
1295
1296            for _ in 0..5 {
1297                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1298                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1299                assert_eq!(
1300                    actual.cursor(start_ix).summary::<TextSummary>(end_ix),
1301                    TextSummary::from(&expected[start_ix..end_ix])
1302                );
1303            }
1304
1305            let mut expected_longest_rows = Vec::new();
1306            let mut longest_line_len = -1_isize;
1307            for (row, line) in expected.split('\n').enumerate() {
1308                let row = row as u32;
1309                assert_eq!(
1310                    actual.line_len(row),
1311                    line.len() as u32,
1312                    "invalid line len for row {}",
1313                    row
1314                );
1315
1316                let line_char_count = line.chars().count() as isize;
1317                match line_char_count.cmp(&longest_line_len) {
1318                    Ordering::Less => {}
1319                    Ordering::Equal => expected_longest_rows.push(row),
1320                    Ordering::Greater => {
1321                        longest_line_len = line_char_count;
1322                        expected_longest_rows.clear();
1323                        expected_longest_rows.push(row);
1324                    }
1325                }
1326            }
1327
1328            let longest_row = actual.summary().longest_row;
1329            assert!(
1330                expected_longest_rows.contains(&longest_row),
1331                "incorrect longest row {}. expected {:?} with length {}",
1332                longest_row,
1333                expected_longest_rows,
1334                longest_line_len,
1335            );
1336        }
1337    }
1338
1339    fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
1340        while !text.is_char_boundary(offset) {
1341            match bias {
1342                Bias::Left => offset -= 1,
1343                Bias::Right => offset += 1,
1344            }
1345        }
1346        offset
1347    }
1348
1349    impl Rope {
1350        fn text(&self) -> String {
1351            let mut text = String::new();
1352            for chunk in self.chunks.cursor::<()>() {
1353                text.push_str(&chunk.0);
1354            }
1355            text
1356        }
1357    }
1358}