rope.rs

   1use crate::PointUtf16;
   2
   3use super::Point;
   4use arrayvec::ArrayString;
   5use bromberg_sl2::{DigestString, HashMatrix};
   6use smallvec::SmallVec;
   7use std::{cmp, fmt, io, mem, ops::Range, str};
   8use sum_tree::{Bias, Dimension, SumTree};
   9
  10#[cfg(test)]
  11const CHUNK_BASE: usize = 6;
  12
  13#[cfg(not(test))]
  14const CHUNK_BASE: usize = 16;
  15
  16#[derive(Clone, Default, Debug)]
  17pub struct Rope {
  18    chunks: SumTree<Chunk>,
  19}
  20
  21impl Rope {
  22    pub fn new() -> Self {
  23        Self::default()
  24    }
  25
  26    pub fn append(&mut self, rope: Rope) {
  27        let mut chunks = rope.chunks.cursor::<()>();
  28        chunks.next(&());
  29        if let Some(chunk) = chunks.item() {
  30            if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE)
  31                || chunk.0.len() < CHUNK_BASE
  32            {
  33                self.push(&chunk.0);
  34                chunks.next(&());
  35            }
  36        }
  37
  38        self.chunks.push_tree(chunks.suffix(&()), &());
  39        self.check_invariants();
  40    }
  41
  42    pub fn replace(&mut self, range: Range<usize>, text: &str) {
  43        let mut new_rope = Rope::new();
  44        let mut cursor = self.cursor(0);
  45        new_rope.append(cursor.slice(range.start));
  46        cursor.seek_forward(range.end);
  47        new_rope.push(text);
  48        new_rope.append(cursor.suffix());
  49        *self = new_rope;
  50    }
  51
  52    pub fn slice(&self, range: Range<usize>) -> Rope {
  53        let mut cursor = self.cursor(0);
  54        cursor.seek_forward(range.start);
  55        cursor.slice(range.end)
  56    }
  57
  58    pub fn push(&mut self, text: &str) {
  59        let mut new_chunks = SmallVec::<[_; 16]>::new();
  60        let mut new_chunk = ArrayString::new();
  61        for ch in text.chars() {
  62            if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
  63                new_chunks.push(Chunk(new_chunk));
  64                new_chunk = ArrayString::new();
  65            }
  66
  67            new_chunk.push(ch);
  68        }
  69        if !new_chunk.is_empty() {
  70            new_chunks.push(Chunk(new_chunk));
  71        }
  72
  73        let mut new_chunks = new_chunks.into_iter();
  74        let mut first_new_chunk = new_chunks.next();
  75        self.chunks.update_last(
  76            |last_chunk| {
  77                if let Some(first_new_chunk_ref) = first_new_chunk.as_mut() {
  78                    if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE {
  79                        last_chunk.0.push_str(&first_new_chunk.take().unwrap().0);
  80                    } else {
  81                        let mut text = ArrayString::<{ 4 * CHUNK_BASE }>::new();
  82                        text.push_str(&last_chunk.0);
  83                        text.push_str(&first_new_chunk_ref.0);
  84                        let (left, right) = text.split_at(find_split_ix(&text));
  85                        last_chunk.0.clear();
  86                        last_chunk.0.push_str(left);
  87                        first_new_chunk_ref.0.clear();
  88                        first_new_chunk_ref.0.push_str(right);
  89                    }
  90                }
  91            },
  92            &(),
  93        );
  94
  95        self.chunks
  96            .extend(first_new_chunk.into_iter().chain(new_chunks), &());
  97        self.check_invariants();
  98    }
  99
 100    pub fn push_front(&mut self, text: &str) {
 101        let suffix = mem::replace(self, Rope::from(text));
 102        self.append(suffix);
 103    }
 104
 105    fn check_invariants(&self) {
 106        #[cfg(test)]
 107        {
 108            // Ensure all chunks except maybe the last one are not underflowing.
 109            // Allow some wiggle room for multibyte characters at chunk boundaries.
 110            let mut chunks = self.chunks.cursor::<()>().peekable();
 111            while let Some(chunk) = chunks.next() {
 112                if chunks.peek().is_some() {
 113                    assert!(chunk.0.len() + 3 >= CHUNK_BASE);
 114                }
 115            }
 116        }
 117    }
 118
 119    pub fn summary(&self) -> TextSummary {
 120        self.chunks.summary().text.clone()
 121    }
 122
 123    pub fn len(&self) -> usize {
 124        self.chunks.extent(&())
 125    }
 126
 127    pub fn max_point(&self) -> Point {
 128        self.chunks.extent(&())
 129    }
 130
 131    pub fn max_point_utf16(&self) -> PointUtf16 {
 132        self.chunks.extent(&())
 133    }
 134
 135    pub fn cursor(&self, offset: usize) -> Cursor {
 136        Cursor::new(self, offset)
 137    }
 138
 139    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
 140        self.chars_at(0)
 141    }
 142
 143    pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 144        self.chunks_in_range(start..self.len()).flat_map(str::chars)
 145    }
 146
 147    pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 148        self.reversed_chunks_in_range(0..start)
 149            .flat_map(|chunk| chunk.chars().rev())
 150    }
 151
 152    pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes {
 153        Bytes::new(self, range)
 154    }
 155
 156    pub fn chunks<'a>(&'a self) -> Chunks<'a> {
 157        self.chunks_in_range(0..self.len())
 158    }
 159
 160    pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks {
 161        Chunks::new(self, range, false)
 162    }
 163
 164    pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks {
 165        Chunks::new(self, range, true)
 166    }
 167
 168    pub fn offset_to_point(&self, offset: usize) -> Point {
 169        if offset >= self.summary().bytes {
 170            return self.summary().lines;
 171        }
 172        let mut cursor = self.chunks.cursor::<(usize, Point)>();
 173        cursor.seek(&offset, Bias::Left, &());
 174        let overshoot = offset - cursor.start().0;
 175        cursor.start().1
 176            + cursor
 177                .item()
 178                .map_or(Point::zero(), |chunk| chunk.offset_to_point(overshoot))
 179    }
 180
 181    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 182        if offset >= self.summary().bytes {
 183            return self.summary().lines_utf16;
 184        }
 185        let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>();
 186        cursor.seek(&offset, Bias::Left, &());
 187        let overshoot = offset - cursor.start().0;
 188        cursor.start().1
 189            + cursor.item().map_or(PointUtf16::zero(), |chunk| {
 190                chunk.offset_to_point_utf16(overshoot)
 191            })
 192    }
 193
 194    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 195        if point >= self.summary().lines {
 196            return self.summary().lines_utf16;
 197        }
 198        let mut cursor = self.chunks.cursor::<(Point, PointUtf16)>();
 199        cursor.seek(&point, Bias::Left, &());
 200        let overshoot = point - cursor.start().0;
 201        cursor.start().1
 202            + cursor.item().map_or(PointUtf16::zero(), |chunk| {
 203                chunk.point_to_point_utf16(overshoot)
 204            })
 205    }
 206
 207    pub fn point_to_offset(&self, point: Point) -> usize {
 208        if point >= self.summary().lines {
 209            return self.summary().bytes;
 210        }
 211        let mut cursor = self.chunks.cursor::<(Point, usize)>();
 212        cursor.seek(&point, Bias::Left, &());
 213        let overshoot = point - cursor.start().0;
 214        cursor.start().1
 215            + cursor
 216                .item()
 217                .map_or(0, |chunk| chunk.point_to_offset(overshoot))
 218    }
 219
 220    pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
 221        if point >= self.summary().lines_utf16 {
 222            return self.summary().bytes;
 223        }
 224        let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>();
 225        cursor.seek(&point, Bias::Left, &());
 226        let overshoot = point - cursor.start().0;
 227        cursor.start().1
 228            + cursor
 229                .item()
 230                .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot))
 231    }
 232
 233    pub fn point_utf16_to_point(&self, point: PointUtf16) -> Point {
 234        if point >= self.summary().lines_utf16 {
 235            return self.summary().lines;
 236        }
 237        let mut cursor = self.chunks.cursor::<(PointUtf16, Point)>();
 238        cursor.seek(&point, Bias::Left, &());
 239        let overshoot = point - cursor.start().0;
 240        cursor.start().1
 241            + cursor
 242                .item()
 243                .map_or(Point::zero(), |chunk| chunk.point_utf16_to_point(overshoot))
 244    }
 245
 246    pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize {
 247        let mut cursor = self.chunks.cursor::<usize>();
 248        cursor.seek(&offset, Bias::Left, &());
 249        if let Some(chunk) = cursor.item() {
 250            let mut ix = offset - cursor.start();
 251            while !chunk.0.is_char_boundary(ix) {
 252                match bias {
 253                    Bias::Left => {
 254                        ix -= 1;
 255                        offset -= 1;
 256                    }
 257                    Bias::Right => {
 258                        ix += 1;
 259                        offset += 1;
 260                    }
 261                }
 262            }
 263            offset
 264        } else {
 265            self.summary().bytes
 266        }
 267    }
 268
 269    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 270        let mut cursor = self.chunks.cursor::<Point>();
 271        cursor.seek(&point, Bias::Right, &());
 272        if let Some(chunk) = cursor.item() {
 273            let overshoot = point - cursor.start();
 274            *cursor.start() + chunk.clip_point(overshoot, bias)
 275        } else {
 276            self.summary().lines
 277        }
 278    }
 279
 280    pub fn clip_point_utf16(&self, point: PointUtf16, bias: Bias) -> PointUtf16 {
 281        let mut cursor = self.chunks.cursor::<PointUtf16>();
 282        cursor.seek(&point, Bias::Right, &());
 283        if let Some(chunk) = cursor.item() {
 284            let overshoot = point - cursor.start();
 285            *cursor.start() + chunk.clip_point_utf16(overshoot, bias)
 286        } else {
 287            self.summary().lines_utf16
 288        }
 289    }
 290
 291    pub fn line_len(&self, row: u32) -> u32 {
 292        self.clip_point(Point::new(row, u32::MAX), Bias::Left)
 293            .column
 294    }
 295
 296    pub fn fingerprint(&self) -> String {
 297        self.chunks.summary().fingerprint.to_hex()
 298    }
 299}
 300
 301impl<'a> From<&'a str> for Rope {
 302    fn from(text: &'a str) -> Self {
 303        let mut rope = Self::new();
 304        rope.push(text);
 305        rope
 306    }
 307}
 308
 309impl fmt::Display for Rope {
 310    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 311        for chunk in self.chunks() {
 312            write!(f, "{}", chunk)?;
 313        }
 314        Ok(())
 315    }
 316}
 317
 318pub struct Cursor<'a> {
 319    rope: &'a Rope,
 320    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 321    offset: usize,
 322}
 323
 324impl<'a> Cursor<'a> {
 325    pub fn new(rope: &'a Rope, offset: usize) -> Self {
 326        let mut chunks = rope.chunks.cursor();
 327        chunks.seek(&offset, Bias::Right, &());
 328        Self {
 329            rope,
 330            chunks,
 331            offset,
 332        }
 333    }
 334
 335    pub fn seek_forward(&mut self, end_offset: usize) {
 336        debug_assert!(end_offset >= self.offset);
 337
 338        self.chunks.seek_forward(&end_offset, Bias::Right, &());
 339        self.offset = end_offset;
 340    }
 341
 342    pub fn slice(&mut self, end_offset: usize) -> Rope {
 343        debug_assert!(
 344            end_offset >= self.offset,
 345            "cannot slice backwards from {} to {}",
 346            self.offset,
 347            end_offset
 348        );
 349
 350        let mut slice = Rope::new();
 351        if let Some(start_chunk) = self.chunks.item() {
 352            let start_ix = self.offset - self.chunks.start();
 353            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
 354            slice.push(&start_chunk.0[start_ix..end_ix]);
 355        }
 356
 357        if end_offset > self.chunks.end(&()) {
 358            self.chunks.next(&());
 359            slice.append(Rope {
 360                chunks: self.chunks.slice(&end_offset, Bias::Right, &()),
 361            });
 362            if let Some(end_chunk) = self.chunks.item() {
 363                let end_ix = end_offset - self.chunks.start();
 364                slice.push(&end_chunk.0[..end_ix]);
 365            }
 366        }
 367
 368        self.offset = end_offset;
 369        slice
 370    }
 371
 372    pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
 373        debug_assert!(end_offset >= self.offset);
 374
 375        let mut summary = D::default();
 376        if let Some(start_chunk) = self.chunks.item() {
 377            let start_ix = self.offset - self.chunks.start();
 378            let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
 379            summary.add_assign(&D::from_text_summary(&TextSummary::from(
 380                &start_chunk.0[start_ix..end_ix],
 381            )));
 382        }
 383
 384        if end_offset > self.chunks.end(&()) {
 385            self.chunks.next(&());
 386            summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right, &()));
 387            if let Some(end_chunk) = self.chunks.item() {
 388                let end_ix = end_offset - self.chunks.start();
 389                summary.add_assign(&D::from_text_summary(&TextSummary::from(
 390                    &end_chunk.0[..end_ix],
 391                )));
 392            }
 393        }
 394
 395        self.offset = end_offset;
 396        summary
 397    }
 398
 399    pub fn suffix(mut self) -> Rope {
 400        self.slice(self.rope.chunks.extent(&()))
 401    }
 402
 403    pub fn offset(&self) -> usize {
 404        self.offset
 405    }
 406}
 407
 408pub struct Chunks<'a> {
 409    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 410    range: Range<usize>,
 411    reversed: bool,
 412}
 413
 414impl<'a> Chunks<'a> {
 415    pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
 416        let mut chunks = rope.chunks.cursor();
 417        if reversed {
 418            chunks.seek(&range.end, Bias::Left, &());
 419        } else {
 420            chunks.seek(&range.start, Bias::Right, &());
 421        }
 422        Self {
 423            chunks,
 424            range,
 425            reversed,
 426        }
 427    }
 428
 429    pub fn offset(&self) -> usize {
 430        if self.reversed {
 431            self.range.end.min(self.chunks.end(&()))
 432        } else {
 433            self.range.start.max(*self.chunks.start())
 434        }
 435    }
 436
 437    pub fn seek(&mut self, offset: usize) {
 438        let bias = if self.reversed {
 439            Bias::Left
 440        } else {
 441            Bias::Right
 442        };
 443
 444        if offset >= self.chunks.end(&()) {
 445            self.chunks.seek_forward(&offset, bias, &());
 446        } else {
 447            self.chunks.seek(&offset, bias, &());
 448        }
 449
 450        if self.reversed {
 451            self.range.end = offset;
 452        } else {
 453            self.range.start = offset;
 454        }
 455    }
 456
 457    pub fn peek(&self) -> Option<&'a str> {
 458        let chunk = self.chunks.item()?;
 459        if self.reversed && self.range.start >= self.chunks.end(&()) {
 460            return None;
 461        }
 462        let chunk_start = *self.chunks.start();
 463        if self.range.end <= chunk_start {
 464            return None;
 465        }
 466
 467        let start = self.range.start.saturating_sub(chunk_start);
 468        let end = self.range.end - chunk_start;
 469        Some(&chunk.0[start..chunk.0.len().min(end)])
 470    }
 471}
 472
 473impl<'a> Iterator for Chunks<'a> {
 474    type Item = &'a str;
 475
 476    fn next(&mut self) -> Option<Self::Item> {
 477        let result = self.peek();
 478        if result.is_some() {
 479            if self.reversed {
 480                self.chunks.prev(&());
 481            } else {
 482                self.chunks.next(&());
 483            }
 484        }
 485        result
 486    }
 487}
 488
 489pub struct Bytes<'a> {
 490    chunks: sum_tree::Cursor<'a, Chunk, usize>,
 491    range: Range<usize>,
 492}
 493
 494impl<'a> Bytes<'a> {
 495    pub fn new(rope: &'a Rope, range: Range<usize>) -> Self {
 496        let mut chunks = rope.chunks.cursor();
 497        chunks.seek(&range.start, Bias::Right, &());
 498        Self { chunks, range }
 499    }
 500
 501    pub fn peek(&self) -> Option<&'a [u8]> {
 502        let chunk = self.chunks.item()?;
 503        let chunk_start = *self.chunks.start();
 504        if self.range.end <= chunk_start {
 505            return None;
 506        }
 507
 508        let start = self.range.start.saturating_sub(chunk_start);
 509        let end = self.range.end - chunk_start;
 510        Some(&chunk.0.as_bytes()[start..chunk.0.len().min(end)])
 511    }
 512}
 513
 514impl<'a> Iterator for Bytes<'a> {
 515    type Item = &'a [u8];
 516
 517    fn next(&mut self) -> Option<Self::Item> {
 518        let result = self.peek();
 519        if result.is_some() {
 520            self.chunks.next(&());
 521        }
 522        result
 523    }
 524}
 525
 526impl<'a> io::Read for Bytes<'a> {
 527    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
 528        if let Some(chunk) = self.peek() {
 529            let len = cmp::min(buf.len(), chunk.len());
 530            buf[..len].copy_from_slice(&chunk[..len]);
 531            self.range.start += len;
 532            if len == chunk.len() {
 533                self.chunks.next(&());
 534            }
 535            Ok(len)
 536        } else {
 537            Ok(0)
 538        }
 539    }
 540}
 541
 542#[derive(Clone, Debug, Default)]
 543struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
 544
 545impl Chunk {
 546    fn offset_to_point(&self, target: usize) -> Point {
 547        let mut offset = 0;
 548        let mut point = Point::new(0, 0);
 549        for ch in self.0.chars() {
 550            if offset >= target {
 551                break;
 552            }
 553
 554            if ch == '\n' {
 555                point.row += 1;
 556                point.column = 0;
 557            } else {
 558                point.column += ch.len_utf8() as u32;
 559            }
 560            offset += ch.len_utf8();
 561        }
 562        point
 563    }
 564
 565    fn offset_to_point_utf16(&self, target: usize) -> PointUtf16 {
 566        let mut offset = 0;
 567        let mut point = PointUtf16::new(0, 0);
 568        for ch in self.0.chars() {
 569            if offset >= target {
 570                break;
 571            }
 572
 573            if ch == '\n' {
 574                point.row += 1;
 575                point.column = 0;
 576            } else {
 577                point.column += ch.len_utf16() as u32;
 578            }
 579            offset += ch.len_utf8();
 580        }
 581        point
 582    }
 583
 584    fn point_to_offset(&self, target: Point) -> usize {
 585        let mut offset = 0;
 586        let mut point = Point::new(0, 0);
 587        for ch in self.0.chars() {
 588            if point >= target {
 589                if point > target {
 590                    panic!("point {:?} is inside of character {:?}", target, ch);
 591                }
 592                break;
 593            }
 594
 595            if ch == '\n' {
 596                point.row += 1;
 597                if point.row > target.row {
 598                    panic!(
 599                        "point {:?} is beyond the end of a line with length {}",
 600                        target, point.column
 601                    );
 602                }
 603                point.column = 0;
 604            } else {
 605                point.column += ch.len_utf8() as u32;
 606            }
 607            offset += ch.len_utf8();
 608        }
 609        offset
 610    }
 611
 612    fn point_to_point_utf16(&self, target: Point) -> PointUtf16 {
 613        let mut point = Point::zero();
 614        let mut point_utf16 = PointUtf16::new(0, 0);
 615        for ch in self.0.chars() {
 616            if point >= target {
 617                break;
 618            }
 619
 620            if ch == '\n' {
 621                point_utf16.row += 1;
 622                point_utf16.column = 0;
 623                point.row += 1;
 624                point.column = 0;
 625            } else {
 626                point_utf16.column += ch.len_utf16() as u32;
 627                point.column += ch.len_utf8() as u32;
 628            }
 629        }
 630        point_utf16
 631    }
 632
 633    fn point_utf16_to_offset(&self, target: PointUtf16) -> usize {
 634        let mut offset = 0;
 635        let mut point = PointUtf16::new(0, 0);
 636        for ch in self.0.chars() {
 637            if point >= target {
 638                if point > target {
 639                    panic!("point {:?} is inside of character {:?}", target, ch);
 640                }
 641                break;
 642            }
 643
 644            if ch == '\n' {
 645                point.row += 1;
 646                if point.row > target.row {
 647                    panic!(
 648                        "point {:?} is beyond the end of a line with length {}",
 649                        target, point.column
 650                    );
 651                }
 652                point.column = 0;
 653            } else {
 654                point.column += ch.len_utf16() as u32;
 655            }
 656            offset += ch.len_utf8();
 657        }
 658        offset
 659    }
 660
 661    fn point_utf16_to_point(&self, target: PointUtf16) -> Point {
 662        let mut point = Point::zero();
 663        let mut point_utf16 = PointUtf16::zero();
 664        for ch in self.0.chars() {
 665            if point_utf16 >= target {
 666                if point_utf16 > target {
 667                    panic!("point {:?} is inside of character {:?}", target, ch);
 668                }
 669                break;
 670            }
 671
 672            if ch == '\n' {
 673                point_utf16 += PointUtf16::new(1, 0);
 674                point += Point::new(1, 0);
 675            } else {
 676                point_utf16 += PointUtf16::new(0, ch.len_utf16() as u32);
 677                point += Point::new(0, ch.len_utf8() as u32);
 678            }
 679        }
 680        point
 681    }
 682
 683    fn clip_point(&self, target: Point, bias: Bias) -> Point {
 684        for (row, line) in self.0.split('\n').enumerate() {
 685            if row == target.row as usize {
 686                let mut column = target.column.min(line.len() as u32);
 687                while !line.is_char_boundary(column as usize) {
 688                    match bias {
 689                        Bias::Left => column -= 1,
 690                        Bias::Right => column += 1,
 691                    }
 692                }
 693                return Point::new(row as u32, column);
 694            }
 695        }
 696        unreachable!()
 697    }
 698
 699    fn clip_point_utf16(&self, target: PointUtf16, bias: Bias) -> PointUtf16 {
 700        for (row, line) in self.0.split('\n').enumerate() {
 701            if row == target.row as usize {
 702                let mut code_units = line.encode_utf16();
 703                let mut column = code_units.by_ref().take(target.column as usize).count();
 704                if char::decode_utf16(code_units).next().transpose().is_err() {
 705                    match bias {
 706                        Bias::Left => column -= 1,
 707                        Bias::Right => column += 1,
 708                    }
 709                }
 710                return PointUtf16::new(row as u32, column as u32);
 711            }
 712        }
 713        unreachable!()
 714    }
 715}
 716
 717impl sum_tree::Item for Chunk {
 718    type Summary = ChunkSummary;
 719
 720    fn summary(&self) -> Self::Summary {
 721        ChunkSummary::from(self.0.as_str())
 722    }
 723}
 724
 725#[derive(Clone, Debug, Default, Eq, PartialEq)]
 726pub struct ChunkSummary {
 727    text: TextSummary,
 728    fingerprint: HashMatrix,
 729}
 730
 731impl<'a> From<&'a str> for ChunkSummary {
 732    fn from(text: &'a str) -> Self {
 733        Self {
 734            text: TextSummary::from(text),
 735            fingerprint: bromberg_sl2::hash_strict(text.as_bytes()),
 736        }
 737    }
 738}
 739
 740impl sum_tree::Summary for ChunkSummary {
 741    type Context = ();
 742
 743    fn add_summary(&mut self, summary: &Self, _: &()) {
 744        self.text += &summary.text;
 745        self.fingerprint = self.fingerprint * summary.fingerprint;
 746    }
 747}
 748
 749#[derive(Clone, Debug, Default, Eq, PartialEq)]
 750pub struct TextSummary {
 751    pub bytes: usize,
 752    pub lines: Point,
 753    pub lines_utf16: PointUtf16,
 754    pub first_line_chars: u32,
 755    pub last_line_chars: u32,
 756    pub longest_row: u32,
 757    pub longest_row_chars: u32,
 758}
 759
 760impl<'a> From<&'a str> for TextSummary {
 761    fn from(text: &'a str) -> Self {
 762        let mut lines = Point::new(0, 0);
 763        let mut lines_utf16 = PointUtf16::new(0, 0);
 764        let mut first_line_chars = 0;
 765        let mut last_line_chars = 0;
 766        let mut longest_row = 0;
 767        let mut longest_row_chars = 0;
 768        for c in text.chars() {
 769            if c == '\n' {
 770                lines += Point::new(1, 0);
 771                lines_utf16 += PointUtf16::new(1, 0);
 772                last_line_chars = 0;
 773            } else {
 774                lines.column += c.len_utf8() as u32;
 775                lines_utf16.column += c.len_utf16() as u32;
 776                last_line_chars += 1;
 777            }
 778
 779            if lines.row == 0 {
 780                first_line_chars = last_line_chars;
 781            }
 782
 783            if last_line_chars > longest_row_chars {
 784                longest_row = lines.row;
 785                longest_row_chars = last_line_chars;
 786            }
 787        }
 788
 789        TextSummary {
 790            bytes: text.len(),
 791            lines,
 792            lines_utf16,
 793            first_line_chars,
 794            last_line_chars,
 795            longest_row,
 796            longest_row_chars,
 797        }
 798    }
 799}
 800
 801impl sum_tree::Summary for TextSummary {
 802    type Context = ();
 803
 804    fn add_summary(&mut self, summary: &Self, _: &Self::Context) {
 805        *self += summary;
 806    }
 807}
 808
 809impl<'a> std::ops::Add<Self> for TextSummary {
 810    type Output = Self;
 811
 812    fn add(mut self, rhs: Self) -> Self::Output {
 813        self.add_assign(&rhs);
 814        self
 815    }
 816}
 817
 818impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
 819    fn add_assign(&mut self, other: &'a Self) {
 820        let joined_chars = self.last_line_chars + other.first_line_chars;
 821        if joined_chars > self.longest_row_chars {
 822            self.longest_row = self.lines.row;
 823            self.longest_row_chars = joined_chars;
 824        }
 825        if other.longest_row_chars > self.longest_row_chars {
 826            self.longest_row = self.lines.row + other.longest_row;
 827            self.longest_row_chars = other.longest_row_chars;
 828        }
 829
 830        if self.lines.row == 0 {
 831            self.first_line_chars += other.first_line_chars;
 832        }
 833
 834        if other.lines.row == 0 {
 835            self.last_line_chars += other.first_line_chars;
 836        } else {
 837            self.last_line_chars = other.last_line_chars;
 838        }
 839
 840        self.bytes += other.bytes;
 841        self.lines += other.lines;
 842        self.lines_utf16 += other.lines_utf16;
 843    }
 844}
 845
 846impl std::ops::AddAssign<Self> for TextSummary {
 847    fn add_assign(&mut self, other: Self) {
 848        *self += &other;
 849    }
 850}
 851
 852pub trait TextDimension: 'static + for<'a> Dimension<'a, ChunkSummary> {
 853    fn from_text_summary(summary: &TextSummary) -> Self;
 854    fn add_assign(&mut self, other: &Self);
 855}
 856
 857impl<'a, D1: TextDimension, D2: TextDimension> TextDimension for (D1, D2) {
 858    fn from_text_summary(summary: &TextSummary) -> Self {
 859        (
 860            D1::from_text_summary(summary),
 861            D2::from_text_summary(summary),
 862        )
 863    }
 864
 865    fn add_assign(&mut self, other: &Self) {
 866        self.0.add_assign(&other.0);
 867        self.1.add_assign(&other.1);
 868    }
 869}
 870
 871impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
 872    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
 873        *self += &summary.text;
 874    }
 875}
 876
 877impl TextDimension for TextSummary {
 878    fn from_text_summary(summary: &TextSummary) -> Self {
 879        summary.clone()
 880    }
 881
 882    fn add_assign(&mut self, other: &Self) {
 883        *self += other;
 884    }
 885}
 886
 887impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
 888    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
 889        *self += summary.text.bytes;
 890    }
 891}
 892
 893impl TextDimension for usize {
 894    fn from_text_summary(summary: &TextSummary) -> Self {
 895        summary.bytes
 896    }
 897
 898    fn add_assign(&mut self, other: &Self) {
 899        *self += other;
 900    }
 901}
 902
 903impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
 904    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
 905        *self += summary.text.lines;
 906    }
 907}
 908
 909impl TextDimension for Point {
 910    fn from_text_summary(summary: &TextSummary) -> Self {
 911        summary.lines
 912    }
 913
 914    fn add_assign(&mut self, other: &Self) {
 915        *self += other;
 916    }
 917}
 918
 919impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
 920    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
 921        *self += summary.text.lines_utf16;
 922    }
 923}
 924
 925impl TextDimension for PointUtf16 {
 926    fn from_text_summary(summary: &TextSummary) -> Self {
 927        summary.lines_utf16
 928    }
 929
 930    fn add_assign(&mut self, other: &Self) {
 931        *self += other;
 932    }
 933}
 934
 935fn find_split_ix(text: &str) -> usize {
 936    let mut ix = text.len() / 2;
 937    while !text.is_char_boundary(ix) {
 938        if ix < 2 * CHUNK_BASE {
 939            ix += 1;
 940        } else {
 941            ix = (text.len() / 2) - 1;
 942            break;
 943        }
 944    }
 945    while !text.is_char_boundary(ix) {
 946        ix -= 1;
 947    }
 948
 949    debug_assert!(ix <= 2 * CHUNK_BASE);
 950    debug_assert!(text.len() - ix <= 2 * CHUNK_BASE);
 951    ix
 952}
 953
 954#[cfg(test)]
 955mod tests {
 956    use super::*;
 957    use crate::random_char_iter::RandomCharIter;
 958    use rand::prelude::*;
 959    use std::{cmp::Ordering, env, io::Read};
 960    use Bias::{Left, Right};
 961
 962    #[test]
 963    fn test_all_4_byte_chars() {
 964        let mut rope = Rope::new();
 965        let text = "🏀".repeat(256);
 966        rope.push(&text);
 967        assert_eq!(rope.text(), text);
 968    }
 969
 970    #[test]
 971    fn test_clip() {
 972        let rope = Rope::from("🧘");
 973
 974        assert_eq!(rope.clip_offset(1, Bias::Left), 0);
 975        assert_eq!(rope.clip_offset(1, Bias::Right), 4);
 976        assert_eq!(rope.clip_offset(5, Bias::Right), 4);
 977
 978        assert_eq!(
 979            rope.clip_point(Point::new(0, 1), Bias::Left),
 980            Point::new(0, 0)
 981        );
 982        assert_eq!(
 983            rope.clip_point(Point::new(0, 1), Bias::Right),
 984            Point::new(0, 4)
 985        );
 986        assert_eq!(
 987            rope.clip_point(Point::new(0, 5), Bias::Right),
 988            Point::new(0, 4)
 989        );
 990
 991        assert_eq!(
 992            rope.clip_point_utf16(PointUtf16::new(0, 1), Bias::Left),
 993            PointUtf16::new(0, 0)
 994        );
 995        assert_eq!(
 996            rope.clip_point_utf16(PointUtf16::new(0, 1), Bias::Right),
 997            PointUtf16::new(0, 2)
 998        );
 999        assert_eq!(
1000            rope.clip_point_utf16(PointUtf16::new(0, 3), Bias::Right),
1001            PointUtf16::new(0, 2)
1002        );
1003    }
1004
1005    #[gpui::test(iterations = 100)]
1006    fn test_random_rope(mut rng: StdRng) {
1007        let operations = env::var("OPERATIONS")
1008            .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1009            .unwrap_or(10);
1010
1011        let mut expected = String::new();
1012        let mut actual = Rope::new();
1013        for _ in 0..operations {
1014            let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1015            let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1016            let len = rng.gen_range(0..=64);
1017            let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1018
1019            let mut new_actual = Rope::new();
1020            let mut cursor = actual.cursor(0);
1021            new_actual.append(cursor.slice(start_ix));
1022            new_actual.push(&new_text);
1023            cursor.seek_forward(end_ix);
1024            new_actual.append(cursor.suffix());
1025            actual = new_actual;
1026
1027            expected.replace_range(start_ix..end_ix, &new_text);
1028
1029            assert_eq!(actual.text(), expected);
1030            log::info!("text: {:?}", expected);
1031
1032            for _ in 0..5 {
1033                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1034                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1035
1036                let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1037                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1038
1039                let mut actual_text = String::new();
1040                actual
1041                    .bytes_in_range(start_ix..end_ix)
1042                    .read_to_string(&mut actual_text)
1043                    .unwrap();
1044                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1045
1046                assert_eq!(
1047                    actual
1048                        .reversed_chunks_in_range(start_ix..end_ix)
1049                        .collect::<Vec<&str>>()
1050                        .into_iter()
1051                        .rev()
1052                        .collect::<String>(),
1053                    &expected[start_ix..end_ix]
1054                );
1055            }
1056
1057            let mut point = Point::new(0, 0);
1058            let mut point_utf16 = PointUtf16::new(0, 0);
1059            for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
1060                assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
1061                assert_eq!(
1062                    actual.offset_to_point_utf16(ix),
1063                    point_utf16,
1064                    "offset_to_point_utf16({})",
1065                    ix
1066                );
1067                assert_eq!(
1068                    actual.point_to_offset(point),
1069                    ix,
1070                    "point_to_offset({:?})",
1071                    point
1072                );
1073                assert_eq!(
1074                    actual.point_utf16_to_offset(point_utf16),
1075                    ix,
1076                    "point_utf16_to_offset({:?})",
1077                    point_utf16
1078                );
1079                if ch == '\n' {
1080                    point += Point::new(1, 0);
1081                    point_utf16 += PointUtf16::new(1, 0);
1082                } else {
1083                    point.column += ch.len_utf8() as u32;
1084                    point_utf16.column += ch.len_utf16() as u32;
1085                }
1086            }
1087
1088            let mut point_utf16 = PointUtf16::zero();
1089            for unit in expected.encode_utf16() {
1090                let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
1091                let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
1092                assert!(right_point >= left_point);
1093                // Ensure translating UTF-16 points to offsets doesn't panic.
1094                actual.point_utf16_to_offset(left_point);
1095                actual.point_utf16_to_offset(right_point);
1096
1097                if unit == b'\n' as u16 {
1098                    point_utf16 += PointUtf16::new(1, 0);
1099                } else {
1100                    point_utf16 += PointUtf16::new(0, 1);
1101                }
1102            }
1103
1104            for _ in 0..5 {
1105                let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1106                let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1107                assert_eq!(
1108                    actual.cursor(start_ix).summary::<TextSummary>(end_ix),
1109                    TextSummary::from(&expected[start_ix..end_ix])
1110                );
1111            }
1112
1113            let mut expected_longest_rows = Vec::new();
1114            let mut longest_line_len = -1_isize;
1115            for (row, line) in expected.split('\n').enumerate() {
1116                let row = row as u32;
1117                assert_eq!(
1118                    actual.line_len(row),
1119                    line.len() as u32,
1120                    "invalid line len for row {}",
1121                    row
1122                );
1123
1124                let line_char_count = line.chars().count() as isize;
1125                match line_char_count.cmp(&longest_line_len) {
1126                    Ordering::Less => {}
1127                    Ordering::Equal => expected_longest_rows.push(row),
1128                    Ordering::Greater => {
1129                        longest_line_len = line_char_count;
1130                        expected_longest_rows.clear();
1131                        expected_longest_rows.push(row);
1132                    }
1133                }
1134            }
1135
1136            let longest_row = actual.summary().longest_row;
1137            assert!(
1138                expected_longest_rows.contains(&longest_row),
1139                "incorrect longest row {}. expected {:?} with length {}",
1140                longest_row,
1141                expected_longest_rows,
1142                longest_line_len,
1143            );
1144        }
1145    }
1146
1147    fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
1148        while !text.is_char_boundary(offset) {
1149            match bias {
1150                Bias::Left => offset -= 1,
1151                Bias::Right => offset += 1,
1152            }
1153        }
1154        offset
1155    }
1156
1157    impl Rope {
1158        fn text(&self) -> String {
1159            let mut text = String::new();
1160            for chunk in self.chunks.cursor::<()>() {
1161                text.push_str(&chunk.0);
1162            }
1163            text
1164        }
1165    }
1166}