rope.rs

   1mod offset_utf16;
   2mod point;
   3mod point_utf16;
   4mod unclipped;
   5
   6use arrayvec::ArrayString;
   7use bromberg_sl2::HashMatrix;
   8use smallvec::SmallVec;
   9use std::{
  10    cmp, fmt, io, mem,
  11    ops::{AddAssign, Range},
  12    str,
  13};
  14use sum_tree::{Bias, Dimension, SumTree};
  15use util::debug_panic;
  16
  17pub use offset_utf16::OffsetUtf16;
  18pub use point::Point;
  19pub use point_utf16::PointUtf16;
  20pub use unclipped::Unclipped;
  21
  22#[cfg(test)]
  23const CHUNK_BASE: usize = 6;
  24
  25#[cfg(not(test))]
  26const CHUNK_BASE: usize = 16;
  27
  28/// Type alias to [HashMatrix], an implementation of a homomorphic hash function. Two [Rope] instances
  29/// containing the same text will produce the same fingerprint. This hash function is special in that
  30/// it allows us to hash individual chunks and aggregate them up the [Rope]'s tree, with the resulting
  31/// hash being equivalent to hashing all the text contained in the [Rope] at once.
  32pub type RopeFingerprint = HashMatrix;
  33
  34#[derive(Clone, Default)]
  35pub struct Rope {
  36    chunks: SumTree<Chunk>,
  37}
  38
  39impl Rope {
  40    pub fn new() -> Self {
  41        Self::default()
  42    }
  43
  44    pub fn text_fingerprint(text: &str) -> RopeFingerprint {
  45        bromberg_sl2::hash_strict(text.as_bytes())
  46    }
  47
  48    pub fn append(&mut self, rope: Rope) {
  49        let mut chunks = rope.chunks.cursor::<()>();
  50        chunks.next(&());
  51        if let Some(chunk) = chunks.item() {
  52            if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE)
  53                || chunk.0.len() < CHUNK_BASE
  54            {
  55                self.push(&chunk.0);
  56                chunks.next(&());
  57            }
  58        }
  59
  60        self.chunks.append(chunks.suffix(&()), &());
  61        self.check_invariants();
  62    }
  63
  64    pub fn replace(&mut self, range: Range<usize>, text: &str) {
  65        let mut new_rope = Rope::new();
  66        let mut cursor = self.cursor(0);
  67        new_rope.append(cursor.slice(range.start));
  68        cursor.seek_forward(range.end);
  69        new_rope.push(text);
  70        new_rope.append(cursor.suffix());
  71        *self = new_rope;
  72    }
  73
  74    pub fn slice(&self, range: Range<usize>) -> Rope {
  75        let mut cursor = self.cursor(0);
  76        cursor.seek_forward(range.start);
  77        cursor.slice(range.end)
  78    }
  79
  80    pub fn slice_rows(&self, range: Range<u32>) -> Rope {
  81        //This would be more efficient with a forward advance after the first, but it's fine
  82        let start = self.point_to_offset(Point::new(range.start, 0));
  83        let end = self.point_to_offset(Point::new(range.end, 0));
  84        self.slice(start..end)
  85    }
  86
  87    pub fn push(&mut self, text: &str) {
  88        let mut new_chunks = SmallVec::<[_; 16]>::new();
  89        let mut new_chunk = ArrayString::new();
  90        for ch in text.chars() {
  91            if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
  92                new_chunks.push(Chunk(new_chunk));
  93                new_chunk = ArrayString::new();
  94            }
  95
  96            new_chunk.push(ch);
  97        }
  98        if !new_chunk.is_empty() {
  99            new_chunks.push(Chunk(new_chunk));
 100        }
 101
 102        let mut new_chunks = new_chunks.into_iter();
 103        let mut first_new_chunk = new_chunks.next();
 104        self.chunks.update_last(
 105            |last_chunk| {
 106                if let Some(first_new_chunk_ref) = first_new_chunk.as_mut() {
 107                    if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE {
 108                        last_chunk.0.push_str(&first_new_chunk.take().unwrap().0);
 109                    } else {
 110                        let mut text = ArrayString::<{ 4 * CHUNK_BASE }>::new();
 111                        text.push_str(&last_chunk.0);
 112                        text.push_str(&first_new_chunk_ref.0);
 113                        let (left, right) = text.split_at(find_split_ix(&text));
 114                        last_chunk.0.clear();
 115                        last_chunk.0.push_str(left);
 116                        first_new_chunk_ref.0.clear();
 117                        first_new_chunk_ref.0.push_str(right);
 118                    }
 119                }
 120            },
 121            &(),
 122        );
 123
 124        self.chunks
 125            .extend(first_new_chunk.into_iter().chain(new_chunks), &());
 126        self.check_invariants();
 127    }
 128
 129    pub fn push_front(&mut self, text: &str) {
 130        let suffix = mem::replace(self, Rope::from(text));
 131        self.append(suffix);
 132    }
 133
 134    fn check_invariants(&self) {
 135        #[cfg(test)]
 136        {
 137            // Ensure all chunks except maybe the last one are not underflowing.
 138            // Allow some wiggle room for multibyte characters at chunk boundaries.
 139            let mut chunks = self.chunks.cursor::<()>().peekable();
 140            while let Some(chunk) = chunks.next() {
 141                if chunks.peek().is_some() {
 142                    assert!(chunk.0.len() + 3 >= CHUNK_BASE);
 143                }
 144            }
 145        }
 146    }
 147
 148    pub fn summary(&self) -> TextSummary {
 149        self.chunks.summary().text.clone()
 150    }
 151
 152    pub fn len(&self) -> usize {
 153        self.chunks.extent(&())
 154    }
 155
 156    pub fn is_empty(&self) -> bool {
 157        self.len() == 0
 158    }
 159
 160    pub fn max_point(&self) -> Point {
 161        self.chunks.extent(&())
 162    }
 163
 164    pub fn max_point_utf16(&self) -> PointUtf16 {
 165        self.chunks.extent(&())
 166    }
 167
 168    pub fn cursor(&self, offset: usize) -> Cursor {
 169        Cursor::new(self, offset)
 170    }
 171
 172    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
 173        self.chars_at(0)
 174    }
 175
 176    pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 177        self.chunks_in_range(start..self.len()).flat_map(str::chars)
 178    }
 179
 180    pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 181        self.reversed_chunks_in_range(0..start)
 182            .flat_map(|chunk| chunk.chars().rev())
 183    }
 184
 185    pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes {
 186        Bytes::new(self, range, false)
 187    }
 188
 189    pub fn reversed_bytes_in_range(&self, range: Range<usize>) -> Bytes {
 190        Bytes::new(self, range, true)
 191    }
 192
 193    pub fn chunks(&self) -> Chunks {
 194        self.chunks_in_range(0..self.len())
 195    }
 196
 197    pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks {
 198        Chunks::new(self, range, false)
 199    }
 200
 201    pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks {
 202        Chunks::new(self, range, true)
 203    }
 204
 205    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 206        if offset >= self.summary().len {
 207            return self.summary().len_utf16;
 208        }
 209        let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>();
 210        cursor.seek(&offset, Bias::Left, &());
 211        let overshoot = offset - cursor.start().0;
 212        cursor.start().1
 213            + cursor.item().map_or(Default::default(), |chunk| {
 214                chunk.offset_to_offset_utf16(overshoot)
 215            })
 216    }
 217
 218    pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
 219        if offset >= self.summary().len_utf16 {
 220            return self.summary().len;
 221        }
 222        let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>();
 223        cursor.seek(&offset, Bias::Left, &());
 224        let overshoot = offset - cursor.start().0;
 225        cursor.start().1
 226            + cursor.item().map_or(Default::default(), |chunk| {
 227                chunk.offset_utf16_to_offset(overshoot)
 228            })
 229    }
 230
 231    pub fn offset_to_point(&self, offset: usize) -> Point {
 232        if offset >= self.summary().len {
 233            return self.summary().lines;
 234        }
 235        let mut cursor = self.chunks.cursor::<(usize, Point)>();
 236        cursor.seek(&offset, Bias::Left, &());
 237        let overshoot = offset - cursor.start().0;
 238        cursor.start().1
 239            + cursor
 240                .item()
 241                .map_or(Point::zero(), |chunk| chunk.offset_to_point(overshoot))
 242    }
 243
 244    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 245        if offset >= self.summary().len {
 246            return self.summary().lines_utf16();
 247        }
 248        let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>();
 249        cursor.seek(&offset, Bias::Left, &());
 250        let overshoot = offset - cursor.start().0;
 251        cursor.start().1
 252            + cursor.item().map_or(PointUtf16::zero(), |chunk| {
 253                chunk.offset_to_point_utf16(overshoot)
 254            })
 255    }
 256
 257    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 258        if point >= self.summary().lines {
 259            return self.summary().lines_utf16();
 260        }
 261        let mut cursor = self.chunks.cursor::<(Point, PointUtf16)>();
 262        cursor.seek(&point, Bias::Left, &());
 263        let overshoot = point - cursor.start().0;
 264        cursor.start().1
 265            + cursor.item().map_or(PointUtf16::zero(), |chunk| {
 266                chunk.point_to_point_utf16(overshoot)
 267            })
 268    }
 269
 270    pub fn point_to_offset(&self, point: Point) -> usize {
 271        if point >= self.summary().lines {
 272            return self.summary().len;
 273        }
 274        let mut cursor = self.chunks.cursor::<(Point, usize)>();
 275        cursor.seek(&point, Bias::Left, &());
 276        let overshoot = point - cursor.start().0;
 277        cursor.start().1
 278            + cursor
 279                .item()
 280                .map_or(0, |chunk| chunk.point_to_offset(overshoot))
 281    }
 282
 283    pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
 284        self.point_utf16_to_offset_impl(point, false)
 285    }
 286
 287    pub fn unclipped_point_utf16_to_offset(&self, point: Unclipped<PointUtf16>) -> usize {
 288        self.point_utf16_to_offset_impl(point.0, true)
 289    }
 290
 291    fn point_utf16_to_offset_impl(&self, point: PointUtf16, clip: bool) -> usize {
 292        if point >= self.summary().lines_utf16() {
 293            return self.summary().len;
 294        }
 295        let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>();
 296        cursor.seek(&point, Bias::Left, &());
 297        let overshoot = point - cursor.start().0;
 298        cursor.start().1
 299            + cursor
 300                .item()
 301                .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot, clip))
 302    }
 303
 304    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 305        if point.0 >= self.summary().lines_utf16() {
 306            return self.summary().lines;
 307        }
 308        let mut cursor = self.chunks.cursor::<(PointUtf16, Point)>();
 309        cursor.seek(&point.0, Bias::Left, &());
 310        let overshoot = Unclipped(point.0 - cursor.start().0);
 311        cursor.start().1
 312            + cursor.item().map_or(Point::zero(), |chunk| {
 313                chunk.unclipped_point_utf16_to_point(overshoot)
 314            })
 315    }
 316
 317    pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize {
 318        let mut cursor = self.chunks.cursor::<usize>();
 319        cursor.seek(&offset, Bias::Left, &());
 320        if let Some(chunk) = cursor.item() {
 321            let mut ix = offset - cursor.start();
 322            while !chunk.0.is_char_boundary(ix) {
 323                match bias {
 324                    Bias::Left => {
 325                        ix -= 1;
 326                        offset -= 1;
 327                    }
 328                    Bias::Right => {
 329                        ix += 1;
 330                        offset += 1;
 331                    }
 332                }
 333            }
 334            offset
 335        } else {
 336            self.summary().len
 337        }
 338    }
 339
 340    pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 341        let mut cursor = self.chunks.cursor::<OffsetUtf16>();
 342        cursor.seek(&offset, Bias::Right, &());
 343        if let Some(chunk) = cursor.item() {
 344            let overshoot = offset - cursor.start();
 345            *cursor.start() + chunk.clip_offset_utf16(overshoot, bias)
 346        } else {
 347            self.summary().len_utf16
 348        }
 349    }
 350
 351    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 352        let mut cursor = self.chunks.cursor::<Point>();
 353        cursor.seek(&point, Bias::Right, &());
 354        if let Some(chunk) = cursor.item() {
 355            let overshoot = point - cursor.start();
 356            *cursor.start() + chunk.clip_point(overshoot, bias)
 357        } else {
 358            self.summary().lines
 359        }
 360    }
 361
 362    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 363        let mut cursor = self.chunks.cursor::<PointUtf16>();
 364        cursor.seek(&point.0, Bias::Right, &());
 365        if let Some(chunk) = cursor.item() {
 366            let overshoot = Unclipped(point.0 - cursor.start());
 367            *cursor.start() + chunk.clip_point_utf16(overshoot, bias)
 368        } else {
 369            self.summary().lines_utf16()
 370        }
 371    }
 372
 373    pub fn line_len(&self, row: u32) -> u32 {
 374        self.clip_point(Point::new(row, u32::MAX), Bias::Left)
 375            .column
 376    }
 377
 378    pub fn fingerprint(&self) -> RopeFingerprint {
 379        self.chunks.summary().fingerprint
 380    }
 381}
 382
 383impl<'a> From<&'a str> for Rope {
 384    fn from(text: &'a str) -> Self {
 385        let mut rope = Self::new();
 386        rope.push(text);
 387        rope
 388    }
 389}
 390
 391impl<'a> FromIterator<&'a str> for Rope {
 392    fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
 393        let mut rope = Rope::new();
 394        for chunk in iter {
 395            rope.push(chunk);
 396        }
 397        rope
 398    }
 399}
 400
 401impl From<String> for Rope {
 402    fn from(text: String) -> Self {
 403        Rope::from(text.as_str())
 404    }
 405}
 406
 407impl fmt::Display for Rope {
 408    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 409        for chunk in self.chunks() {
 410            write!(f, "{}", chunk)?;
 411        }
 412        Ok(())
 413    }
 414}
 415
 416impl fmt::Debug for Rope {
 417    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 418        use std::fmt::Write as _;
 419
 420        write!(f, "\"")?;
 421        let mut format_string = String::new();
 422        for chunk in self.chunks() {
 423            write!(&mut format_string, "{:?}", chunk)?;
 424            write!(f, "{}", &format_string[1..format_string.len() - 1])?;
 425            format_string.clear();
 426        }
 427        write!(f, "\"")?;
 428        Ok(())
 429    }
 430}
 431
 432pub struct Cursor<'a> {
 433    rope: &'a Rope,
 434    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 435    offset: usize,
 436}
 437
 438impl<'a> Cursor<'a> {
 439    pub fn new(rope: &'a Rope, offset: usize) -> Self {
 440        let mut chunks = rope.chunks.cursor();
 441        chunks.seek(&offset, Bias::Right, &());
 442        Self {
 443            rope,
 444            chunks,
 445            offset,
 446        }
 447    }
 448
 449    pub fn seek_forward(&mut self, end_offset: usize) {
 450        debug_assert!(end_offset >= self.offset);
 451
 452        self.chunks.seek_forward(&end_offset, Bias::Right, &());
 453        self.offset = end_offset;
 454    }
 455
 456    pub fn slice(&mut self, end_offset: usize) -> Rope {
 457        debug_assert!(
 458            end_offset >= self.offset,
 459            "cannot slice backwards from {} to {}",
 460            self.offset,
 461            end_offset
 462        );
 463
 464        let mut slice = Rope::new();
 465        if let Some(start_chunk) = self.chunks.item() {
 466            let start_ix = self.offset - self.chunks.start();
 467            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
 468            slice.push(&start_chunk.0[start_ix..end_ix]);
 469        }
 470
 471        if end_offset > self.chunks.end(&()) {
 472            self.chunks.next(&());
 473            slice.append(Rope {
 474                chunks: self.chunks.slice(&end_offset, Bias::Right, &()),
 475            });
 476            if let Some(end_chunk) = self.chunks.item() {
 477                let end_ix = end_offset - self.chunks.start();
 478                slice.push(&end_chunk.0[..end_ix]);
 479            }
 480        }
 481
 482        self.offset = end_offset;
 483        slice
 484    }
 485
 486    pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
 487        debug_assert!(end_offset >= self.offset);
 488
 489        let mut summary = D::default();
 490        if let Some(start_chunk) = self.chunks.item() {
 491            let start_ix = self.offset - self.chunks.start();
 492            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
 493            summary.add_assign(&D::from_text_summary(&TextSummary::from(
 494                &start_chunk.0[start_ix..end_ix],
 495            )));
 496        }
 497
 498        if end_offset > self.chunks.end(&()) {
 499            self.chunks.next(&());
 500            summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right, &()));
 501            if let Some(end_chunk) = self.chunks.item() {
 502                let end_ix = end_offset - self.chunks.start();
 503                summary.add_assign(&D::from_text_summary(&TextSummary::from(
 504                    &end_chunk.0[..end_ix],
 505                )));
 506            }
 507        }
 508
 509        self.offset = end_offset;
 510        summary
 511    }
 512
 513    pub fn suffix(mut self) -> Rope {
 514        self.slice(self.rope.chunks.extent(&()))
 515    }
 516
 517    pub fn offset(&self) -> usize {
 518        self.offset
 519    }
 520}
 521
 522pub struct Chunks<'a> {
 523    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 524    range: Range<usize>,
 525    reversed: bool,
 526}
 527
 528impl<'a> Chunks<'a> {
 529    pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
 530        let mut chunks = rope.chunks.cursor();
 531        if reversed {
 532            chunks.seek(&range.end, Bias::Left, &());
 533        } else {
 534            chunks.seek(&range.start, Bias::Right, &());
 535        }
 536        Self {
 537            chunks,
 538            range,
 539            reversed,
 540        }
 541    }
 542
 543    pub fn offset(&self) -> usize {
 544        if self.reversed {
 545            self.range.end.min(self.chunks.end(&()))
 546        } else {
 547            self.range.start.max(*self.chunks.start())
 548        }
 549    }
 550
 551    pub fn seek(&mut self, offset: usize) {
 552        let bias = if self.reversed {
 553            Bias::Left
 554        } else {
 555            Bias::Right
 556        };
 557
 558        if offset >= self.chunks.end(&()) {
 559            self.chunks.seek_forward(&offset, bias, &());
 560        } else {
 561            self.chunks.seek(&offset, bias, &());
 562        }
 563
 564        if self.reversed {
 565            self.range.end = offset;
 566        } else {
 567            self.range.start = offset;
 568        }
 569    }
 570
 571    pub fn peek(&self) -> Option<&'a str> {
 572        let chunk = self.chunks.item()?;
 573        if self.reversed && self.range.start >= self.chunks.end(&()) {
 574            return None;
 575        }
 576        let chunk_start = *self.chunks.start();
 577        if self.range.end <= chunk_start {
 578            return None;
 579        }
 580
 581        let start = self.range.start.saturating_sub(chunk_start);
 582        let end = self.range.end - chunk_start;
 583        Some(&chunk.0[start..chunk.0.len().min(end)])
 584    }
 585}
 586
 587impl<'a> Iterator for Chunks<'a> {
 588    type Item = &'a str;
 589
 590    fn next(&mut self) -> Option<Self::Item> {
 591        let result = self.peek();
 592        if result.is_some() {
 593            if self.reversed {
 594                self.chunks.prev(&());
 595            } else {
 596                self.chunks.next(&());
 597            }
 598        }
 599        result
 600    }
 601}
 602
 603pub struct Bytes<'a> {
 604    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 605    range: Range<usize>,
 606    reversed: bool,
 607}
 608
 609impl<'a> Bytes<'a> {
 610    pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
 611        let mut chunks = rope.chunks.cursor();
 612        if reversed {
 613            chunks.seek(&range.end, Bias::Left, &());
 614        } else {
 615            chunks.seek(&range.start, Bias::Right, &());
 616        }
 617        Self {
 618            chunks,
 619            range,
 620            reversed,
 621        }
 622    }
 623
 624    pub fn peek(&self) -> Option<&'a [u8]> {
 625        let chunk = self.chunks.item()?;
 626        if self.reversed && self.range.start >= self.chunks.end(&()) {
 627            return None;
 628        }
 629        let chunk_start = *self.chunks.start();
 630        if self.range.end <= chunk_start {
 631            return None;
 632        }
 633        let start = self.range.start.saturating_sub(chunk_start);
 634        let end = self.range.end - chunk_start;
 635        Some(&chunk.0.as_bytes()[start..chunk.0.len().min(end)])
 636    }
 637}
 638
 639impl<'a> Iterator for Bytes<'a> {
 640    type Item = &'a [u8];
 641
 642    fn next(&mut self) -> Option<Self::Item> {
 643        let result = self.peek();
 644        if result.is_some() {
 645            if self.reversed {
 646                self.chunks.prev(&());
 647            } else {
 648                self.chunks.next(&());
 649            }
 650        }
 651        result
 652    }
 653}
 654
 655impl<'a> io::Read for Bytes<'a> {
 656    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
 657        if let Some(chunk) = self.peek() {
 658            let len = cmp::min(buf.len(), chunk.len());
 659            if self.reversed {
 660                buf[..len].copy_from_slice(&chunk[chunk.len() - len..]);
 661                buf[..len].reverse();
 662                self.range.end -= len;
 663            } else {
 664                buf[..len].copy_from_slice(&chunk[..len]);
 665                self.range.start += len;
 666            }
 667
 668            if len == chunk.len() {
 669                if self.reversed {
 670                    self.chunks.prev(&());
 671                } else {
 672                    self.chunks.next(&());
 673                }
 674            }
 675            Ok(len)
 676        } else {
 677            Ok(0)
 678        }
 679    }
 680}
 681
 682#[derive(Clone, Debug, Default)]
 683struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
 684
 685impl Chunk {
 686    fn offset_to_offset_utf16(&self, target: usize) -> OffsetUtf16 {
 687        let mut offset = 0;
 688        let mut offset_utf16 = OffsetUtf16(0);
 689        for ch in self.0.chars() {
 690            if offset >= target {
 691                break;
 692            }
 693
 694            offset += ch.len_utf8();
 695            offset_utf16.0 += ch.len_utf16();
 696        }
 697        offset_utf16
 698    }
 699
 700    fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 701        let mut offset_utf16 = OffsetUtf16(0);
 702        let mut offset = 0;
 703        for ch in self.0.chars() {
 704            if offset_utf16 >= target {
 705                break;
 706            }
 707
 708            offset += ch.len_utf8();
 709            offset_utf16.0 += ch.len_utf16();
 710        }
 711        offset
 712    }
 713
 714    fn offset_to_point(&self, target: usize) -> Point {
 715        let mut offset = 0;
 716        let mut point = Point::new(0, 0);
 717        for ch in self.0.chars() {
 718            if offset >= target {
 719                break;
 720            }
 721
 722            if ch == '\n' {
 723                point.row += 1;
 724                point.column = 0;
 725            } else {
 726                point.column += ch.len_utf8() as u32;
 727            }
 728            offset += ch.len_utf8();
 729        }
 730        point
 731    }
 732
 733    fn offset_to_point_utf16(&self, target: usize) -> PointUtf16 {
 734        let mut offset = 0;
 735        let mut point = PointUtf16::new(0, 0);
 736        for ch in self.0.chars() {
 737            if offset >= target {
 738                break;
 739            }
 740
 741            if ch == '\n' {
 742                point.row += 1;
 743                point.column = 0;
 744            } else {
 745                point.column += ch.len_utf16() as u32;
 746            }
 747            offset += ch.len_utf8();
 748        }
 749        point
 750    }
 751
 752    fn point_to_offset(&self, target: Point) -> usize {
 753        let mut offset = 0;
 754        let mut point = Point::new(0, 0);
 755
 756        for ch in self.0.chars() {
 757            if point >= target {
 758                if point > target {
 759                    debug_panic!("point {target:?} is inside of character {ch:?}");
 760                }
 761                break;
 762            }
 763
 764            if ch == '\n' {
 765                point.row += 1;
 766                point.column = 0;
 767
 768                if point.row > target.row {
 769                    debug_panic!(
 770                        "point {target:?} is beyond the end of a line with length {}",
 771                        point.column
 772                    );
 773                    break;
 774                }
 775            } else {
 776                point.column += ch.len_utf8() as u32;
 777            }
 778
 779            offset += ch.len_utf8();
 780        }
 781
 782        offset
 783    }
 784
 785    fn point_to_point_utf16(&self, target: Point) -> PointUtf16 {
 786        let mut point = Point::zero();
 787        let mut point_utf16 = PointUtf16::new(0, 0);
 788        for ch in self.0.chars() {
 789            if point >= target {
 790                break;
 791            }
 792
 793            if ch == '\n' {
 794                point_utf16.row += 1;
 795                point_utf16.column = 0;
 796                point.row += 1;
 797                point.column = 0;
 798            } else {
 799                point_utf16.column += ch.len_utf16() as u32;
 800                point.column += ch.len_utf8() as u32;
 801            }
 802        }
 803        point_utf16
 804    }
 805
 806    fn point_utf16_to_offset(&self, target: PointUtf16, clip: bool) -> usize {
 807        let mut offset = 0;
 808        let mut point = PointUtf16::new(0, 0);
 809
 810        for ch in self.0.chars() {
 811            if point == target {
 812                break;
 813            }
 814
 815            if ch == '\n' {
 816                point.row += 1;
 817                point.column = 0;
 818
 819                if point.row > target.row {
 820                    if !clip {
 821                        debug_panic!(
 822                            "point {target:?} is beyond the end of a line with length {}",
 823                            point.column
 824                        );
 825                    }
 826                    // Return the offset of the newline
 827                    return offset;
 828                }
 829            } else {
 830                point.column += ch.len_utf16() as u32;
 831            }
 832
 833            if point > target {
 834                if !clip {
 835                    debug_panic!("point {target:?} is inside of codepoint {ch:?}");
 836                }
 837                // Return the offset of the codepoint which we have landed within, bias left
 838                return offset;
 839            }
 840
 841            offset += ch.len_utf8();
 842        }
 843
 844        offset
 845    }
 846
 847    fn unclipped_point_utf16_to_point(&self, target: Unclipped<PointUtf16>) -> Point {
 848        let mut point = Point::zero();
 849        let mut point_utf16 = PointUtf16::zero();
 850
 851        for ch in self.0.chars() {
 852            if point_utf16 == target.0 {
 853                break;
 854            }
 855
 856            if point_utf16 > target.0 {
 857                // If the point is past the end of a line or inside of a code point,
 858                // return the last valid point before the target.
 859                return point;
 860            }
 861
 862            if ch == '\n' {
 863                point_utf16 += PointUtf16::new(1, 0);
 864                point += Point::new(1, 0);
 865            } else {
 866                point_utf16 += PointUtf16::new(0, ch.len_utf16() as u32);
 867                point += Point::new(0, ch.len_utf8() as u32);
 868            }
 869        }
 870
 871        point
 872    }
 873
 874    fn clip_point(&self, target: Point, bias: Bias) -> Point {
 875        for (row, line) in self.0.split('\n').enumerate() {
 876            if row == target.row as usize {
 877                let mut column = target.column.min(line.len() as u32);
 878                while !line.is_char_boundary(column as usize) {
 879                    match bias {
 880                        Bias::Left => column -= 1,
 881                        Bias::Right => column += 1,
 882                    }
 883                }
 884                return Point::new(row as u32, column);
 885            }
 886        }
 887        unreachable!()
 888    }
 889
 890    fn clip_point_utf16(&self, target: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 891        for (row, line) in self.0.split('\n').enumerate() {
 892            if row == target.0.row as usize {
 893                let mut code_units = line.encode_utf16();
 894                let mut column = code_units.by_ref().take(target.0.column as usize).count();
 895                if char::decode_utf16(code_units).next().transpose().is_err() {
 896                    match bias {
 897                        Bias::Left => column -= 1,
 898                        Bias::Right => column += 1,
 899                    }
 900                }
 901                return PointUtf16::new(row as u32, column as u32);
 902            }
 903        }
 904        unreachable!()
 905    }
 906
 907    fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 908        let mut code_units = self.0.encode_utf16();
 909        let mut offset = code_units.by_ref().take(target.0).count();
 910        if char::decode_utf16(code_units).next().transpose().is_err() {
 911            match bias {
 912                Bias::Left => offset -= 1,
 913                Bias::Right => offset += 1,
 914            }
 915        }
 916        OffsetUtf16(offset)
 917    }
 918}
 919
 920impl sum_tree::Item for Chunk {
 921    type Summary = ChunkSummary;
 922
 923    fn summary(&self) -> Self::Summary {
 924        ChunkSummary::from(self.0.as_str())
 925    }
 926}
 927
 928#[derive(Clone, Debug, Default, Eq, PartialEq)]
 929pub struct ChunkSummary {
 930    text: TextSummary,
 931    fingerprint: RopeFingerprint,
 932}
 933
 934impl<'a> From<&'a str> for ChunkSummary {
 935    fn from(text: &'a str) -> Self {
 936        Self {
 937            text: TextSummary::from(text),
 938            fingerprint: Rope::text_fingerprint(text),
 939        }
 940    }
 941}
 942
 943impl sum_tree::Summary for ChunkSummary {
 944    type Context = ();
 945
 946    fn add_summary(&mut self, summary: &Self, _: &()) {
 947        self.text += &summary.text;
 948        self.fingerprint = self.fingerprint * summary.fingerprint;
 949    }
 950}
 951
 952#[derive(Clone, Debug, Default, Eq, PartialEq)]
 953pub struct TextSummary {
 954    pub len: usize,
 955    pub len_utf16: OffsetUtf16,
 956    pub lines: Point,
 957    pub first_line_chars: u32,
 958    pub last_line_chars: u32,
 959    pub last_line_len_utf16: u32,
 960    pub longest_row: u32,
 961    pub longest_row_chars: u32,
 962}
 963
 964impl TextSummary {
 965    pub fn lines_utf16(&self) -> PointUtf16 {
 966        PointUtf16 {
 967            row: self.lines.row,
 968            column: self.last_line_len_utf16,
 969        }
 970    }
 971}
 972
 973impl<'a> From<&'a str> for TextSummary {
 974    fn from(text: &'a str) -> Self {
 975        let mut len_utf16 = OffsetUtf16(0);
 976        let mut lines = Point::new(0, 0);
 977        let mut first_line_chars = 0;
 978        let mut last_line_chars = 0;
 979        let mut last_line_len_utf16 = 0;
 980        let mut longest_row = 0;
 981        let mut longest_row_chars = 0;
 982        for c in text.chars() {
 983            len_utf16.0 += c.len_utf16();
 984
 985            if c == '\n' {
 986                lines += Point::new(1, 0);
 987                last_line_len_utf16 = 0;
 988                last_line_chars = 0;
 989            } else {
 990                lines.column += c.len_utf8() as u32;
 991                last_line_len_utf16 += c.len_utf16() as u32;
 992                last_line_chars += 1;
 993            }
 994
 995            if lines.row == 0 {
 996                first_line_chars = last_line_chars;
 997            }
 998
 999            if last_line_chars > longest_row_chars {
1000                longest_row = lines.row;
1001                longest_row_chars = last_line_chars;
1002            }
1003        }
1004
1005        TextSummary {
1006            len: text.len(),
1007            len_utf16,
1008            lines,
1009            first_line_chars,
1010            last_line_chars,
1011            last_line_len_utf16,
1012            longest_row,
1013            longest_row_chars,
1014        }
1015    }
1016}
1017
1018impl sum_tree::Summary for TextSummary {
1019    type Context = ();
1020
1021    fn add_summary(&mut self, summary: &Self, _: &Self::Context) {
1022        *self += summary;
1023    }
1024}
1025
1026impl std::ops::Add<Self> for TextSummary {
1027    type Output = Self;
1028
1029    fn add(mut self, rhs: Self) -> Self::Output {
1030        AddAssign::add_assign(&mut self, &rhs);
1031        self
1032    }
1033}
1034
1035impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
1036    fn add_assign(&mut self, other: &'a Self) {
1037        let joined_chars = self.last_line_chars + other.first_line_chars;
1038        if joined_chars > self.longest_row_chars {
1039            self.longest_row = self.lines.row;
1040            self.longest_row_chars = joined_chars;
1041        }
1042        if other.longest_row_chars > self.longest_row_chars {
1043            self.longest_row = self.lines.row + other.longest_row;
1044            self.longest_row_chars = other.longest_row_chars;
1045        }
1046
1047        if self.lines.row == 0 {
1048            self.first_line_chars += other.first_line_chars;
1049        }
1050
1051        if other.lines.row == 0 {
1052            self.last_line_chars += other.first_line_chars;
1053            self.last_line_len_utf16 += other.last_line_len_utf16;
1054        } else {
1055            self.last_line_chars = other.last_line_chars;
1056            self.last_line_len_utf16 = other.last_line_len_utf16;
1057        }
1058
1059        self.len += other.len;
1060        self.len_utf16 += other.len_utf16;
1061        self.lines += other.lines;
1062    }
1063}
1064
1065impl std::ops::AddAssign<Self> for TextSummary {
1066    fn add_assign(&mut self, other: Self) {
1067        *self += &other;
1068    }
1069}
1070
1071pub trait TextDimension: 'static + for<'a> Dimension<'a, ChunkSummary> {
1072    fn from_text_summary(summary: &TextSummary) -> Self;
1073    fn add_assign(&mut self, other: &Self);
1074}
1075
1076impl<D1: TextDimension, D2: TextDimension> TextDimension for (D1, D2) {
1077    fn from_text_summary(summary: &TextSummary) -> Self {
1078        (
1079            D1::from_text_summary(summary),
1080            D2::from_text_summary(summary),
1081        )
1082    }
1083
1084    fn add_assign(&mut self, other: &Self) {
1085        self.0.add_assign(&other.0);
1086        self.1.add_assign(&other.1);
1087    }
1088}
1089
1090impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
1091    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1092        *self += &summary.text;
1093    }
1094}
1095
1096impl TextDimension for TextSummary {
1097    fn from_text_summary(summary: &TextSummary) -> Self {
1098        summary.clone()
1099    }
1100
1101    fn add_assign(&mut self, other: &Self) {
1102        *self += other;
1103    }
1104}
1105
1106impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
1107    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1108        *self += summary.text.len;
1109    }
1110}
1111
1112impl TextDimension for usize {
1113    fn from_text_summary(summary: &TextSummary) -> Self {
1114        summary.len
1115    }
1116
1117    fn add_assign(&mut self, other: &Self) {
1118        *self += other;
1119    }
1120}
1121
1122impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
1123    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1124        *self += summary.text.len_utf16;
1125    }
1126}
1127
1128impl TextDimension for OffsetUtf16 {
1129    fn from_text_summary(summary: &TextSummary) -> Self {
1130        summary.len_utf16
1131    }
1132
1133    fn add_assign(&mut self, other: &Self) {
1134        *self += other;
1135    }
1136}
1137
1138impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
1139    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1140        *self += summary.text.lines;
1141    }
1142}
1143
1144impl TextDimension for Point {
1145    fn from_text_summary(summary: &TextSummary) -> Self {
1146        summary.lines
1147    }
1148
1149    fn add_assign(&mut self, other: &Self) {
1150        *self += other;
1151    }
1152}
1153
1154impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
1155    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1156        *self += summary.text.lines_utf16();
1157    }
1158}
1159
1160impl TextDimension for PointUtf16 {
1161    fn from_text_summary(summary: &TextSummary) -> Self {
1162        summary.lines_utf16()
1163    }
1164
1165    fn add_assign(&mut self, other: &Self) {
1166        *self += other;
1167    }
1168}
1169
1170fn find_split_ix(text: &str) -> usize {
1171    let mut ix = text.len() / 2;
1172    while !text.is_char_boundary(ix) {
1173        if ix < 2 * CHUNK_BASE {
1174            ix += 1;
1175        } else {
1176            ix = (text.len() / 2) - 1;
1177            break;
1178        }
1179    }
1180    while !text.is_char_boundary(ix) {
1181        ix -= 1;
1182    }
1183
1184    debug_assert!(ix <= 2 * CHUNK_BASE);
1185    debug_assert!(text.len() - ix <= 2 * CHUNK_BASE);
1186    ix
1187}
1188
1189#[cfg(test)]
1190mod tests {
1191    use super::*;
1192    use rand::prelude::*;
1193    use std::{cmp::Ordering, env, io::Read};
1194    use util::RandomCharIter;
1195    use Bias::{Left, Right};
1196
1197    #[test]
1198    fn test_all_4_byte_chars() {
1199        let mut rope = Rope::new();
1200        let text = "🏀".repeat(256);
1201        rope.push(&text);
1202        assert_eq!(rope.text(), text);
1203    }
1204
1205    #[test]
1206    fn test_clip() {
1207        let rope = Rope::from("🧘");
1208
1209        assert_eq!(rope.clip_offset(1, Bias::Left), 0);
1210        assert_eq!(rope.clip_offset(1, Bias::Right), 4);
1211        assert_eq!(rope.clip_offset(5, Bias::Right), 4);
1212
1213        assert_eq!(
1214            rope.clip_point(Point::new(0, 1), Bias::Left),
1215            Point::new(0, 0)
1216        );
1217        assert_eq!(
1218            rope.clip_point(Point::new(0, 1), Bias::Right),
1219            Point::new(0, 4)
1220        );
1221        assert_eq!(
1222            rope.clip_point(Point::new(0, 5), Bias::Right),
1223            Point::new(0, 4)
1224        );
1225
1226        assert_eq!(
1227            rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Left),
1228            PointUtf16::new(0, 0)
1229        );
1230        assert_eq!(
1231            rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Right),
1232            PointUtf16::new(0, 2)
1233        );
1234        assert_eq!(
1235            rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 3)), Bias::Right),
1236            PointUtf16::new(0, 2)
1237        );
1238
1239        assert_eq!(
1240            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
1241            OffsetUtf16(0)
1242        );
1243        assert_eq!(
1244            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
1245            OffsetUtf16(2)
1246        );
1247        assert_eq!(
1248            rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
1249            OffsetUtf16(2)
1250        );
1251    }
1252
1253    #[gpui::test(iterations = 100)]
1254    fn test_random_rope(mut rng: StdRng) {
1255        let operations = env::var("OPERATIONS")
1256            .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1257            .unwrap_or(10);
1258
1259        let mut expected = String::new();
1260        let mut actual = Rope::new();
1261        for _ in 0..operations {
1262            let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1263            let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1264            let len = rng.gen_range(0..=64);
1265            let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1266
1267            let mut new_actual = Rope::new();
1268            let mut cursor = actual.cursor(0);
1269            new_actual.append(cursor.slice(start_ix));
1270            new_actual.push(&new_text);
1271            cursor.seek_forward(end_ix);
1272            new_actual.append(cursor.suffix());
1273            actual = new_actual;
1274
1275            expected.replace_range(start_ix..end_ix, &new_text);
1276
1277            assert_eq!(actual.text(), expected);
1278            log::info!("text: {:?}", expected);
1279
1280            for _ in 0..5 {
1281                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1282                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1283
1284                let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1285                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1286
1287                let mut actual_text = String::new();
1288                actual
1289                    .bytes_in_range(start_ix..end_ix)
1290                    .read_to_string(&mut actual_text)
1291                    .unwrap();
1292                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1293
1294                assert_eq!(
1295                    actual
1296                        .reversed_chunks_in_range(start_ix..end_ix)
1297                        .collect::<Vec<&str>>()
1298                        .into_iter()
1299                        .rev()
1300                        .collect::<String>(),
1301                    &expected[start_ix..end_ix]
1302                );
1303            }
1304
1305            let mut offset_utf16 = OffsetUtf16(0);
1306            let mut point = Point::new(0, 0);
1307            let mut point_utf16 = PointUtf16::new(0, 0);
1308            for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
1309                assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
1310                assert_eq!(
1311                    actual.offset_to_point_utf16(ix),
1312                    point_utf16,
1313                    "offset_to_point_utf16({})",
1314                    ix
1315                );
1316                assert_eq!(
1317                    actual.point_to_offset(point),
1318                    ix,
1319                    "point_to_offset({:?})",
1320                    point
1321                );
1322                assert_eq!(
1323                    actual.point_utf16_to_offset(point_utf16),
1324                    ix,
1325                    "point_utf16_to_offset({:?})",
1326                    point_utf16
1327                );
1328                assert_eq!(
1329                    actual.offset_to_offset_utf16(ix),
1330                    offset_utf16,
1331                    "offset_to_offset_utf16({:?})",
1332                    ix
1333                );
1334                assert_eq!(
1335                    actual.offset_utf16_to_offset(offset_utf16),
1336                    ix,
1337                    "offset_utf16_to_offset({:?})",
1338                    offset_utf16
1339                );
1340                if ch == '\n' {
1341                    point += Point::new(1, 0);
1342                    point_utf16 += PointUtf16::new(1, 0);
1343                } else {
1344                    point.column += ch.len_utf8() as u32;
1345                    point_utf16.column += ch.len_utf16() as u32;
1346                }
1347                offset_utf16.0 += ch.len_utf16();
1348            }
1349
1350            let mut offset_utf16 = OffsetUtf16(0);
1351            let mut point_utf16 = Unclipped(PointUtf16::zero());
1352            for unit in expected.encode_utf16() {
1353                let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
1354                let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
1355                assert!(right_offset >= left_offset);
1356                // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
1357                actual.offset_utf16_to_offset(left_offset);
1358                actual.offset_utf16_to_offset(right_offset);
1359
1360                let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
1361                let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
1362                assert!(right_point >= left_point);
1363                // Ensure translating valid UTF-16 points to offsets doesn't panic.
1364                actual.point_utf16_to_offset(left_point);
1365                actual.point_utf16_to_offset(right_point);
1366
1367                offset_utf16.0 += 1;
1368                if unit == b'\n' as u16 {
1369                    point_utf16.0 += PointUtf16::new(1, 0);
1370                } else {
1371                    point_utf16.0 += PointUtf16::new(0, 1);
1372                }
1373            }
1374
1375            for _ in 0..5 {
1376                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1377                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1378                assert_eq!(
1379                    actual.cursor(start_ix).summary::<TextSummary>(end_ix),
1380                    TextSummary::from(&expected[start_ix..end_ix])
1381                );
1382            }
1383
1384            let mut expected_longest_rows = Vec::new();
1385            let mut longest_line_len = -1_isize;
1386            for (row, line) in expected.split('\n').enumerate() {
1387                let row = row as u32;
1388                assert_eq!(
1389                    actual.line_len(row),
1390                    line.len() as u32,
1391                    "invalid line len for row {}",
1392                    row
1393                );
1394
1395                let line_char_count = line.chars().count() as isize;
1396                match line_char_count.cmp(&longest_line_len) {
1397                    Ordering::Less => {}
1398                    Ordering::Equal => expected_longest_rows.push(row),
1399                    Ordering::Greater => {
1400                        longest_line_len = line_char_count;
1401                        expected_longest_rows.clear();
1402                        expected_longest_rows.push(row);
1403                    }
1404                }
1405            }
1406
1407            let longest_row = actual.summary().longest_row;
1408            assert!(
1409                expected_longest_rows.contains(&longest_row),
1410                "incorrect longest row {}. expected {:?} with length {}",
1411                longest_row,
1412                expected_longest_rows,
1413                longest_line_len,
1414            );
1415        }
1416    }
1417
1418    fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
1419        while !text.is_char_boundary(offset) {
1420            match bias {
1421                Bias::Left => offset -= 1,
1422                Bias::Right => offset += 1,
1423            }
1424        }
1425        offset
1426    }
1427
1428    impl Rope {
1429        fn text(&self) -> String {
1430            let mut text = String::new();
1431            for chunk in self.chunks.cursor::<()>() {
1432                text.push_str(&chunk.0);
1433            }
1434            text
1435        }
1436    }
1437}