rope.rs

   1mod chunk;
   2mod offset_utf16;
   3mod point;
   4mod point_utf16;
   5mod unclipped;
   6
   7use arrayvec::ArrayVec;
   8use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
   9use regex::Regex;
  10use std::{
  11    borrow::Cow,
  12    cmp, fmt, io, mem,
  13    ops::{self, AddAssign, Range},
  14    str,
  15    sync::{Arc, LazyLock},
  16};
  17use sum_tree::{Bias, Dimension, Dimensions, SumTree};
  18
  19pub use chunk::{Chunk, ChunkSlice};
  20pub use offset_utf16::OffsetUtf16;
  21pub use point::Point;
  22pub use point_utf16::PointUtf16;
  23pub use unclipped::Unclipped;
  24
  25use crate::chunk::Bitmap;
  26
  27static LINE_SEPARATORS_REGEX: LazyLock<Regex> =
  28    LazyLock::new(|| Regex::new(r"\r\n|\r").expect("Failed to create LINE_SEPARATORS_REGEX"));
  29
  30#[derive(Clone, Copy, Debug, PartialEq)]
  31pub enum LineEnding {
  32    Unix,
  33    Windows,
  34}
  35
  36impl Default for LineEnding {
  37    fn default() -> Self {
  38        #[cfg(unix)]
  39        return Self::Unix;
  40
  41        #[cfg(not(unix))]
  42        return Self::Windows;
  43    }
  44}
  45
  46impl LineEnding {
  47    pub fn as_str(&self) -> &'static str {
  48        match self {
  49            LineEnding::Unix => "\n",
  50            LineEnding::Windows => "\r\n",
  51        }
  52    }
  53
  54    pub fn label(&self) -> &'static str {
  55        match self {
  56            LineEnding::Unix => "LF",
  57            LineEnding::Windows => "CRLF",
  58        }
  59    }
  60
  61    pub fn detect(text: &str) -> Self {
  62        let mut max_ix = cmp::min(text.len(), 1000);
  63        while !text.is_char_boundary(max_ix) {
  64            max_ix -= 1;
  65        }
  66
  67        if let Some(ix) = text[..max_ix].find(['\n']) {
  68            if ix > 0 && text.as_bytes()[ix - 1] == b'\r' {
  69                Self::Windows
  70            } else {
  71                Self::Unix
  72            }
  73        } else {
  74            Self::default()
  75        }
  76    }
  77
  78    pub fn normalize(text: &mut String) {
  79        if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(text, "\n") {
  80            *text = replaced;
  81        }
  82    }
  83
  84    pub fn normalize_arc(text: Arc<str>) -> Arc<str> {
  85        if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") {
  86            replaced.into()
  87        } else {
  88            text
  89        }
  90    }
  91
  92    pub fn normalize_cow(text: Cow<str>) -> Cow<str> {
  93        if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") {
  94            replaced.into()
  95        } else {
  96            text
  97        }
  98    }
  99
 100    /// Converts text chunks into a [`String`] using the current line ending.
 101    pub fn into_string(&self, chunks: Chunks<'_>) -> String {
 102        match self {
 103            LineEnding::Unix => chunks.collect(),
 104            LineEnding::Windows => {
 105                let line_ending = self.as_str();
 106                let mut result = String::new();
 107                for chunk in chunks {
 108                    result.push_str(&chunk.replace('\n', line_ending));
 109                }
 110                result
 111            }
 112        }
 113    }
 114}
 115
 116#[derive(Clone, Default)]
 117pub struct Rope {
 118    chunks: SumTree<Chunk>,
 119}
 120
 121impl Rope {
 122    pub fn new() -> Self {
 123        Self::default()
 124    }
 125
 126    /// Checks that `index`-th byte is the first byte in a UTF-8 code point
 127    /// sequence or the end of the string.
 128    ///
 129    /// The start and end of the string (when `index == self.len()`) are
 130    /// considered to be boundaries.
 131    ///
 132    /// Returns `false` if `index` is greater than `self.len()`.
 133    pub fn is_char_boundary(&self, offset: usize) -> bool {
 134        if self.chunks.is_empty() {
 135            return offset == 0;
 136        }
 137        let (start, _, item) = self.chunks.find::<usize, _>((), &offset, Bias::Left);
 138        let chunk_offset = offset - start;
 139        item.map(|chunk| chunk.is_char_boundary(chunk_offset))
 140            .unwrap_or(false)
 141    }
 142
 143    #[track_caller]
 144    #[inline(always)]
 145    pub fn assert_char_boundary(&self, offset: usize) {
 146        if self.chunks.is_empty() && offset == 0 {
 147            return;
 148        }
 149        let (start, _, item) = self.chunks.find::<usize, _>((), &offset, Bias::Left);
 150        match item {
 151            Some(chunk) => {
 152                let chunk_offset = offset - start;
 153                chunk.assert_char_boundary(chunk_offset);
 154            }
 155            None => {
 156                panic!(
 157                    "byte index {} is out of bounds of rope (length: {})",
 158                    offset,
 159                    self.len()
 160                );
 161            }
 162        }
 163    }
 164
 165    pub fn floor_char_boundary(&self, index: usize) -> usize {
 166        if index >= self.len() {
 167            self.len()
 168        } else {
 169            #[inline]
 170            pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
 171                // This is bit magic equivalent to: b < 128 || b >= 192
 172                (u8 as i8) >= -0x40
 173            }
 174
 175            let (start, _, item) = self.chunks.find::<usize, _>((), &index, Bias::Left);
 176            let chunk_offset = index - start;
 177            let lower_idx = item.map(|chunk| {
 178                let lower_bound = chunk_offset.saturating_sub(3);
 179                chunk
 180                    .text
 181                    .as_bytes()
 182                    .get(lower_bound..=chunk_offset)
 183                    .map(|it| {
 184                        let new_idx = it
 185                            .iter()
 186                            .rposition(|&b| is_utf8_char_boundary(b))
 187                            .unwrap_or(0);
 188                        lower_bound + new_idx
 189                    })
 190                    .unwrap_or(chunk.text.len())
 191            });
 192            lower_idx.map_or_else(|| self.len(), |idx| start + idx)
 193        }
 194    }
 195
 196    pub fn ceil_char_boundary(&self, index: usize) -> usize {
 197        if index > self.len() {
 198            self.len()
 199        } else {
 200            #[inline]
 201            pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
 202                // This is bit magic equivalent to: b < 128 || b >= 192
 203                (u8 as i8) >= -0x40
 204            }
 205
 206            let (start, _, item) = self.chunks.find::<usize, _>((), &index, Bias::Left);
 207            let chunk_offset = index - start;
 208            let upper_idx = item.map(|chunk| {
 209                let upper_bound = Ord::min(chunk_offset + 4, chunk.text.len());
 210                chunk.text.as_bytes()[chunk_offset..upper_bound]
 211                    .iter()
 212                    .position(|&b| is_utf8_char_boundary(b))
 213                    .map_or(upper_bound, |pos| pos + chunk_offset)
 214            });
 215
 216            upper_idx.map_or_else(|| self.len(), |idx| start + idx)
 217        }
 218    }
 219
 220    pub fn append(&mut self, rope: Rope) {
 221        if let Some(chunk) = rope.chunks.first()
 222            && (self
 223                .chunks
 224                .last()
 225                .is_some_and(|c| c.text.len() < chunk::MIN_BASE)
 226                || chunk.text.len() < chunk::MIN_BASE)
 227        {
 228            self.push_chunk(chunk.as_slice());
 229
 230            let mut chunks = rope.chunks.cursor::<()>(());
 231            chunks.next();
 232            chunks.next();
 233            self.chunks.append(chunks.suffix(), ());
 234        } else {
 235            self.chunks.append(rope.chunks, ());
 236        }
 237        self.check_invariants();
 238    }
 239
 240    pub fn replace(&mut self, range: Range<usize>, text: &str) {
 241        let mut new_rope = Rope::new();
 242        let mut cursor = self.cursor(0);
 243        new_rope.append(cursor.slice(range.start));
 244        cursor.seek_forward(range.end);
 245        new_rope.push(text);
 246        new_rope.append(cursor.suffix());
 247        *self = new_rope;
 248    }
 249
 250    pub fn slice(&self, range: Range<usize>) -> Rope {
 251        let mut cursor = self.cursor(0);
 252        cursor.seek_forward(range.start);
 253        cursor.slice(range.end)
 254    }
 255
 256    pub fn slice_rows(&self, range: Range<u32>) -> Rope {
 257        // This would be more efficient with a forward advance after the first, but it's fine.
 258        let start = self.point_to_offset(Point::new(range.start, 0));
 259        let end = self.point_to_offset(Point::new(range.end, 0));
 260        self.slice(start..end)
 261    }
 262
 263    pub fn push(&mut self, mut text: &str) {
 264        self.chunks.update_last(
 265            |last_chunk| {
 266                let split_ix = if last_chunk.text.len() + text.len() <= chunk::MAX_BASE {
 267                    text.len()
 268                } else {
 269                    let mut split_ix = cmp::min(
 270                        chunk::MIN_BASE.saturating_sub(last_chunk.text.len()),
 271                        text.len(),
 272                    );
 273                    while !text.is_char_boundary(split_ix) {
 274                        split_ix += 1;
 275                    }
 276                    split_ix
 277                };
 278
 279                let (suffix, remainder) = text.split_at(split_ix);
 280                last_chunk.push_str(suffix);
 281                text = remainder;
 282            },
 283            (),
 284        );
 285
 286        #[cfg(not(test))]
 287        const NUM_CHUNKS: usize = 16;
 288        #[cfg(test)]
 289        const NUM_CHUNKS: usize = 4;
 290
 291        // We accommodate for NUM_CHUNKS chunks of size MAX_BASE
 292        // but given the chunk boundary can land within a character
 293        // we need to accommodate for the worst case where every chunk gets cut short by up to 4 bytes
 294        if text.len() > NUM_CHUNKS * chunk::MAX_BASE - NUM_CHUNKS * 4 {
 295            return self.push_large(text);
 296        }
 297        // 16 is enough as otherwise we will hit the branch above
 298        let mut new_chunks = ArrayVec::<_, NUM_CHUNKS>::new();
 299
 300        while !text.is_empty() {
 301            let mut split_ix = cmp::min(chunk::MAX_BASE, text.len());
 302            while !text.is_char_boundary(split_ix) {
 303                split_ix -= 1;
 304            }
 305            let (chunk, remainder) = text.split_at(split_ix);
 306            new_chunks.push(chunk);
 307            text = remainder;
 308        }
 309        self.chunks
 310            .extend(new_chunks.into_iter().map(Chunk::new), ());
 311
 312        self.check_invariants();
 313    }
 314
 315    /// A copy of `push` specialized for working with large quantities of text.
 316    fn push_large(&mut self, mut text: &str) {
 317        // To avoid frequent reallocs when loading large swaths of file contents,
 318        // we estimate worst-case `new_chunks` capacity;
 319        // Chunk is a fixed-capacity buffer. If a character falls on
 320        // chunk boundary, we push it off to the following chunk (thus leaving a small bit of capacity unfilled in current chunk).
 321        // Worst-case chunk count when loading a file is then a case where every chunk ends up with that unused capacity.
 322        // Since we're working with UTF-8, each character is at most 4 bytes wide. It follows then that the worst case is where
 323        // a chunk ends with 3 bytes of a 4-byte character. These 3 bytes end up being stored in the following chunk, thus wasting
 324        // 3 bytes of storage in current chunk.
 325        // For example, a 1024-byte string can occupy between 32 (full ASCII, 1024/32) and 36 (full 4-byte UTF-8, 1024 / 29 rounded up) chunks.
 326        const MIN_CHUNK_SIZE: usize = chunk::MAX_BASE - 3;
 327
 328        // We also round up the capacity up by one, for a good measure; we *really* don't want to realloc here, as we assume that the # of characters
 329        // we're working with there is large.
 330        let capacity = text.len().div_ceil(MIN_CHUNK_SIZE);
 331        let mut new_chunks = Vec::with_capacity(capacity);
 332
 333        while !text.is_empty() {
 334            let mut split_ix = cmp::min(chunk::MAX_BASE, text.len());
 335            while !text.is_char_boundary(split_ix) {
 336                split_ix -= 1;
 337            }
 338            let (chunk, remainder) = text.split_at(split_ix);
 339            new_chunks.push(chunk);
 340            text = remainder;
 341        }
 342
 343        #[cfg(test)]
 344        const PARALLEL_THRESHOLD: usize = 4;
 345        #[cfg(not(test))]
 346        const PARALLEL_THRESHOLD: usize = 4 * (2 * sum_tree::TREE_BASE);
 347
 348        if new_chunks.len() >= PARALLEL_THRESHOLD {
 349            self.chunks
 350                .par_extend(new_chunks.into_par_iter().map(Chunk::new), ());
 351        } else {
 352            self.chunks
 353                .extend(new_chunks.into_iter().map(Chunk::new), ());
 354        }
 355
 356        self.check_invariants();
 357    }
 358
 359    fn push_chunk(&mut self, mut chunk: ChunkSlice) {
 360        self.chunks.update_last(
 361            |last_chunk| {
 362                let split_ix = if last_chunk.text.len() + chunk.len() <= chunk::MAX_BASE {
 363                    chunk.len()
 364                } else {
 365                    let mut split_ix = cmp::min(
 366                        chunk::MIN_BASE.saturating_sub(last_chunk.text.len()),
 367                        chunk.len(),
 368                    );
 369                    while !chunk.is_char_boundary(split_ix) {
 370                        split_ix += 1;
 371                    }
 372                    split_ix
 373                };
 374
 375                let (suffix, remainder) = chunk.split_at(split_ix);
 376                last_chunk.append(suffix);
 377                chunk = remainder;
 378            },
 379            (),
 380        );
 381
 382        if !chunk.is_empty() {
 383            self.chunks.push(chunk.into(), ());
 384        }
 385    }
 386
 387    pub fn push_front(&mut self, text: &str) {
 388        let suffix = mem::replace(self, Rope::from(text));
 389        self.append(suffix);
 390    }
 391
 392    fn check_invariants(&self) {
 393        #[cfg(test)]
 394        {
 395            // Ensure all chunks except maybe the last one are not underflowing.
 396            // Allow some wiggle room for multibyte characters at chunk boundaries.
 397            let mut chunks = self.chunks.cursor::<()>(()).peekable();
 398            while let Some(chunk) = chunks.next() {
 399                if chunks.peek().is_some() {
 400                    assert!(chunk.text.len() + 3 >= chunk::MIN_BASE);
 401                }
 402            }
 403        }
 404    }
 405
 406    pub fn summary(&self) -> TextSummary {
 407        self.chunks.summary().text
 408    }
 409
 410    pub fn len(&self) -> usize {
 411        self.chunks.extent(())
 412    }
 413
 414    pub fn is_empty(&self) -> bool {
 415        self.len() == 0
 416    }
 417
 418    pub fn max_point(&self) -> Point {
 419        self.chunks.extent(())
 420    }
 421
 422    pub fn max_point_utf16(&self) -> PointUtf16 {
 423        self.chunks.extent(())
 424    }
 425
 426    pub fn cursor(&self, offset: usize) -> Cursor<'_> {
 427        Cursor::new(self, offset)
 428    }
 429
 430    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
 431        self.chars_at(0)
 432    }
 433
 434    pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 435        self.chunks_in_range(start..self.len()).flat_map(str::chars)
 436    }
 437
 438    pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 439        self.reversed_chunks_in_range(0..start)
 440            .flat_map(|chunk| chunk.chars().rev())
 441    }
 442
 443    pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes<'_> {
 444        Bytes::new(self, range, false)
 445    }
 446
 447    pub fn reversed_bytes_in_range(&self, range: Range<usize>) -> Bytes<'_> {
 448        Bytes::new(self, range, true)
 449    }
 450
 451    pub fn chunks(&self) -> Chunks<'_> {
 452        self.chunks_in_range(0..self.len())
 453    }
 454
 455    pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks<'_> {
 456        Chunks::new(self, range, false)
 457    }
 458
 459    pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks<'_> {
 460        Chunks::new(self, range, true)
 461    }
 462
 463    /// Formats the rope's text with the specified line ending string.
 464    /// This replaces all `\n` characters with the provided line ending.
 465    ///
 466    /// The rope internally stores all line breaks as `\n` (see `Display` impl).
 467    /// Use this method to convert to different line endings for file operations,
 468    /// LSP communication, or other scenarios requiring specific line ending formats.
 469    pub fn to_string_with_line_ending(&self, line_ending: LineEnding) -> String {
 470        line_ending.into_string(self.chunks())
 471    }
 472
 473    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 474        if offset >= self.summary().len {
 475            return self.summary().len_utf16;
 476        }
 477        let (start, _, item) =
 478            self.chunks
 479                .find::<Dimensions<usize, OffsetUtf16>, _>((), &offset, Bias::Left);
 480        let overshoot = offset - start.0;
 481        start.1
 482            + item.map_or(Default::default(), |chunk| {
 483                chunk.as_slice().offset_to_offset_utf16(overshoot)
 484            })
 485    }
 486
 487    pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
 488        if offset >= self.summary().len_utf16 {
 489            return self.summary().len;
 490        }
 491        let (start, _, item) =
 492            self.chunks
 493                .find::<Dimensions<OffsetUtf16, usize>, _>((), &offset, Bias::Left);
 494        let overshoot = offset - start.0;
 495        start.1
 496            + item.map_or(Default::default(), |chunk| {
 497                chunk.as_slice().offset_utf16_to_offset(overshoot)
 498            })
 499    }
 500
 501    pub fn offset_to_point(&self, offset: usize) -> Point {
 502        if offset >= self.summary().len {
 503            return self.summary().lines;
 504        }
 505        let (start, _, item) =
 506            self.chunks
 507                .find::<Dimensions<usize, Point>, _>((), &offset, Bias::Left);
 508        let overshoot = offset - start.0;
 509        start.1
 510            + item.map_or(Point::zero(), |chunk| {
 511                chunk.as_slice().offset_to_point(overshoot)
 512            })
 513    }
 514
 515    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 516        if offset >= self.summary().len {
 517            return self.summary().lines_utf16();
 518        }
 519        let (start, _, item) =
 520            self.chunks
 521                .find::<Dimensions<usize, PointUtf16>, _>((), &offset, Bias::Left);
 522        let overshoot = offset - start.0;
 523        start.1
 524            + item.map_or(PointUtf16::zero(), |chunk| {
 525                chunk.as_slice().offset_to_point_utf16(overshoot)
 526            })
 527    }
 528
 529    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 530        if point >= self.summary().lines {
 531            return self.summary().lines_utf16();
 532        }
 533        let (start, _, item) =
 534            self.chunks
 535                .find::<Dimensions<Point, PointUtf16>, _>((), &point, Bias::Left);
 536        let overshoot = point - start.0;
 537        start.1
 538            + item.map_or(PointUtf16::zero(), |chunk| {
 539                chunk.as_slice().point_to_point_utf16(overshoot)
 540            })
 541    }
 542
 543    pub fn point_utf16_to_point(&self, point: PointUtf16) -> Point {
 544        if point >= self.summary().lines_utf16() {
 545            return self.summary().lines;
 546        }
 547        let mut cursor = self.chunks.cursor::<Dimensions<PointUtf16, Point>>(());
 548        cursor.seek(&point, Bias::Left);
 549        let overshoot = point - cursor.start().0;
 550        cursor.start().1
 551            + cursor.item().map_or(Point::zero(), |chunk| {
 552                chunk
 553                    .as_slice()
 554                    .offset_to_point(chunk.as_slice().point_utf16_to_offset(overshoot, false))
 555            })
 556    }
 557
 558    pub fn point_to_offset(&self, point: Point) -> usize {
 559        if point >= self.summary().lines {
 560            return self.summary().len;
 561        }
 562        let (start, _, item) =
 563            self.chunks
 564                .find::<Dimensions<Point, usize>, _>((), &point, Bias::Left);
 565        let overshoot = point - start.0;
 566        start.1 + item.map_or(0, |chunk| chunk.as_slice().point_to_offset(overshoot))
 567    }
 568
 569    pub fn point_to_offset_utf16(&self, point: Point) -> OffsetUtf16 {
 570        if point >= self.summary().lines {
 571            return self.summary().len_utf16;
 572        }
 573        let mut cursor = self.chunks.cursor::<Dimensions<Point, OffsetUtf16>>(());
 574        cursor.seek(&point, Bias::Left);
 575        let overshoot = point - cursor.start().0;
 576        cursor.start().1
 577            + cursor.item().map_or(OffsetUtf16(0), |chunk| {
 578                chunk.as_slice().point_to_offset_utf16(overshoot)
 579            })
 580    }
 581
 582    pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
 583        self.point_utf16_to_offset_impl(point, false)
 584    }
 585
 586    pub fn point_utf16_to_offset_utf16(&self, point: PointUtf16) -> OffsetUtf16 {
 587        self.point_utf16_to_offset_utf16_impl(point, false)
 588    }
 589
 590    pub fn unclipped_point_utf16_to_offset(&self, point: Unclipped<PointUtf16>) -> usize {
 591        self.point_utf16_to_offset_impl(point.0, true)
 592    }
 593
 594    fn point_utf16_to_offset_impl(&self, point: PointUtf16, clip: bool) -> usize {
 595        if point >= self.summary().lines_utf16() {
 596            return self.summary().len;
 597        }
 598        let (start, _, item) =
 599            self.chunks
 600                .find::<Dimensions<PointUtf16, usize>, _>((), &point, Bias::Left);
 601        let overshoot = point - start.0;
 602        start.1
 603            + item.map_or(0, |chunk| {
 604                chunk.as_slice().point_utf16_to_offset(overshoot, clip)
 605            })
 606    }
 607
 608    fn point_utf16_to_offset_utf16_impl(&self, point: PointUtf16, clip: bool) -> OffsetUtf16 {
 609        if point >= self.summary().lines_utf16() {
 610            return self.summary().len_utf16;
 611        }
 612        let mut cursor = self
 613            .chunks
 614            .cursor::<Dimensions<PointUtf16, OffsetUtf16>>(());
 615        cursor.seek(&point, Bias::Left);
 616        let overshoot = point - cursor.start().0;
 617        cursor.start().1
 618            + cursor.item().map_or(OffsetUtf16(0), |chunk| {
 619                chunk
 620                    .as_slice()
 621                    .offset_to_offset_utf16(chunk.as_slice().point_utf16_to_offset(overshoot, clip))
 622            })
 623    }
 624
 625    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 626        if point.0 >= self.summary().lines_utf16() {
 627            return self.summary().lines;
 628        }
 629        let (start, _, item) =
 630            self.chunks
 631                .find::<Dimensions<PointUtf16, Point>, _>((), &point.0, Bias::Left);
 632        let overshoot = Unclipped(point.0 - start.0);
 633        start.1
 634            + item.map_or(Point::zero(), |chunk| {
 635                chunk.as_slice().unclipped_point_utf16_to_point(overshoot)
 636            })
 637    }
 638
 639    pub fn clip_offset(&self, offset: usize, bias: Bias) -> usize {
 640        match bias {
 641            Bias::Left => self.floor_char_boundary(offset),
 642            Bias::Right => self.ceil_char_boundary(offset),
 643        }
 644    }
 645
 646    pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 647        let (start, _, item) = self.chunks.find::<OffsetUtf16, _>((), &offset, Bias::Right);
 648        if let Some(chunk) = item {
 649            let overshoot = offset - start;
 650            start + chunk.as_slice().clip_offset_utf16(overshoot, bias)
 651        } else {
 652            self.summary().len_utf16
 653        }
 654    }
 655
 656    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 657        let (start, _, item) = self.chunks.find::<Point, _>((), &point, Bias::Right);
 658        if let Some(chunk) = item {
 659            let overshoot = point - start;
 660            start + chunk.as_slice().clip_point(overshoot, bias)
 661        } else {
 662            self.summary().lines
 663        }
 664    }
 665
 666    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 667        let (start, _, item) = self.chunks.find::<PointUtf16, _>((), &point.0, Bias::Right);
 668        if let Some(chunk) = item {
 669            let overshoot = Unclipped(point.0 - start);
 670            start + chunk.as_slice().clip_point_utf16(overshoot, bias)
 671        } else {
 672            self.summary().lines_utf16()
 673        }
 674    }
 675
 676    pub fn line_len(&self, row: u32) -> u32 {
 677        self.clip_point(Point::new(row, u32::MAX), Bias::Left)
 678            .column
 679    }
 680}
 681
 682impl<'a> From<&'a str> for Rope {
 683    fn from(text: &'a str) -> Self {
 684        let mut rope = Self::new();
 685        rope.push(text);
 686        rope
 687    }
 688}
 689
 690impl<'a> FromIterator<&'a str> for Rope {
 691    fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
 692        let mut rope = Rope::new();
 693        for chunk in iter {
 694            rope.push(chunk);
 695        }
 696        rope
 697    }
 698}
 699
 700impl From<String> for Rope {
 701    #[inline(always)]
 702    fn from(text: String) -> Self {
 703        Rope::from(text.as_str())
 704    }
 705}
 706
 707impl From<&String> for Rope {
 708    #[inline(always)]
 709    fn from(text: &String) -> Self {
 710        Rope::from(text.as_str())
 711    }
 712}
 713
 714/// Display implementation for Rope.
 715///
 716/// Note: This always uses `\n` as the line separator, regardless of the original
 717/// file's line endings. The rope internally normalizes all line breaks to `\n`.
 718/// If you need to preserve original line endings (e.g., for LSP communication),
 719/// use `to_string_with_line_ending` instead.
 720impl fmt::Display for Rope {
 721    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 722        for chunk in self.chunks() {
 723            write!(f, "{chunk}")?;
 724        }
 725        Ok(())
 726    }
 727}
 728
 729impl fmt::Debug for Rope {
 730    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 731        use std::fmt::Write as _;
 732
 733        write!(f, "\"")?;
 734        let mut format_string = String::new();
 735        for chunk in self.chunks() {
 736            write!(&mut format_string, "{:?}", chunk)?;
 737            write!(f, "{}", &format_string[1..format_string.len() - 1])?;
 738            format_string.clear();
 739        }
 740        write!(f, "\"")?;
 741        Ok(())
 742    }
 743}
 744
 745pub struct Cursor<'a> {
 746    rope: &'a Rope,
 747    chunks: sum_tree::Cursor<'a, 'static, Chunk, usize>,
 748    offset: usize,
 749}
 750
 751impl<'a> Cursor<'a> {
 752    pub fn new(rope: &'a Rope, offset: usize) -> Self {
 753        let mut chunks = rope.chunks.cursor(());
 754        chunks.seek(&offset, Bias::Right);
 755        Self {
 756            rope,
 757            chunks,
 758            offset,
 759        }
 760    }
 761
 762    pub fn seek_forward(&mut self, end_offset: usize) {
 763        debug_assert!(end_offset >= self.offset);
 764
 765        self.chunks.seek_forward(&end_offset, Bias::Right);
 766        self.offset = end_offset;
 767    }
 768
 769    pub fn slice(&mut self, end_offset: usize) -> Rope {
 770        debug_assert!(
 771            end_offset >= self.offset,
 772            "cannot slice backwards from {} to {}",
 773            self.offset,
 774            end_offset
 775        );
 776
 777        let mut slice = Rope::new();
 778        if let Some(start_chunk) = self.chunks.item() {
 779            let start_ix = self.offset - self.chunks.start();
 780            let end_ix = cmp::min(end_offset, self.chunks.end()) - self.chunks.start();
 781            slice.push_chunk(start_chunk.slice(start_ix..end_ix));
 782        }
 783
 784        if end_offset > self.chunks.end() {
 785            self.chunks.next();
 786            slice.append(Rope {
 787                chunks: self.chunks.slice(&end_offset, Bias::Right),
 788            });
 789            if let Some(end_chunk) = self.chunks.item() {
 790                let end_ix = end_offset - self.chunks.start();
 791                slice.push_chunk(end_chunk.slice(0..end_ix));
 792            }
 793        }
 794
 795        self.offset = end_offset;
 796        slice
 797    }
 798
 799    pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
 800        debug_assert!(end_offset >= self.offset);
 801
 802        let mut summary = D::zero(());
 803        if let Some(start_chunk) = self.chunks.item() {
 804            let start_ix = self.offset - self.chunks.start();
 805            let end_ix = cmp::min(end_offset, self.chunks.end()) - self.chunks.start();
 806            summary.add_assign(&D::from_chunk(start_chunk.slice(start_ix..end_ix)));
 807        }
 808
 809        if end_offset > self.chunks.end() {
 810            self.chunks.next();
 811            summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right));
 812            if let Some(end_chunk) = self.chunks.item() {
 813                let end_ix = end_offset - self.chunks.start();
 814                summary.add_assign(&D::from_chunk(end_chunk.slice(0..end_ix)));
 815            }
 816        }
 817
 818        self.offset = end_offset;
 819        summary
 820    }
 821
 822    pub fn suffix(mut self) -> Rope {
 823        self.slice(self.rope.chunks.extent(()))
 824    }
 825
 826    pub fn offset(&self) -> usize {
 827        self.offset
 828    }
 829}
 830
 831pub struct ChunkBitmaps<'a> {
 832    /// A slice of text up to 128 bytes in size
 833    pub text: &'a str,
 834    /// Bitmap of character locations in text. LSB ordered
 835    pub chars: Bitmap,
 836    /// Bitmap of tab locations in text. LSB ordered
 837    pub tabs: Bitmap,
 838}
 839
 840#[derive(Clone)]
 841pub struct Chunks<'a> {
 842    chunks: sum_tree::Cursor<'a, 'static, Chunk, usize>,
 843    range: Range<usize>,
 844    offset: usize,
 845    reversed: bool,
 846}
 847
 848impl<'a> Chunks<'a> {
 849    pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
 850        let mut chunks = rope.chunks.cursor(());
 851        let offset = if reversed {
 852            chunks.seek(&range.end, Bias::Left);
 853            range.end
 854        } else {
 855            chunks.seek(&range.start, Bias::Right);
 856            range.start
 857        };
 858        let chunk_offset = offset - chunks.start();
 859        if let Some(chunk) = chunks.item()
 860            && !chunk.text.is_char_boundary(chunk_offset)
 861        {
 862            panic!("byte index {} is not a char boundary", offset);
 863        }
 864        Self {
 865            chunks,
 866            range,
 867            offset,
 868            reversed,
 869        }
 870    }
 871
 872    fn offset_is_valid(&self) -> bool {
 873        if self.reversed {
 874            if self.offset <= self.range.start || self.offset > self.range.end {
 875                return false;
 876            }
 877        } else if self.offset < self.range.start || self.offset >= self.range.end {
 878            return false;
 879        }
 880
 881        true
 882    }
 883
 884    pub fn offset(&self) -> usize {
 885        self.offset
 886    }
 887
 888    pub fn seek(&mut self, mut offset: usize) {
 889        offset = offset.clamp(self.range.start, self.range.end);
 890
 891        if self.reversed {
 892            if offset > self.chunks.end() {
 893                self.chunks.seek_forward(&offset, Bias::Left);
 894            } else if offset <= *self.chunks.start() {
 895                self.chunks.seek(&offset, Bias::Left);
 896            }
 897        } else {
 898            if offset >= self.chunks.end() {
 899                self.chunks.seek_forward(&offset, Bias::Right);
 900            } else if offset < *self.chunks.start() {
 901                self.chunks.seek(&offset, Bias::Right);
 902            }
 903        };
 904
 905        self.offset = offset;
 906    }
 907
 908    pub fn set_range(&mut self, range: Range<usize>) {
 909        self.range = range.clone();
 910        self.seek(range.start);
 911    }
 912
 913    /// Moves this cursor to the start of the next line in the rope.
 914    ///
 915    /// This method advances the cursor to the beginning of the next line.
 916    /// If the cursor is already at the end of the rope, this method does nothing.
 917    /// Reversed chunks iterators are not currently supported and will panic.
 918    ///
 919    /// Returns `true` if the cursor was successfully moved to the next line start,
 920    /// or `false` if the cursor was already at the end of the rope.
 921    pub fn next_line(&mut self) -> bool {
 922        assert!(!self.reversed);
 923
 924        let mut found = false;
 925        if let Some(chunk) = self.peek() {
 926            if let Some(newline_ix) = chunk.find('\n') {
 927                self.offset += newline_ix + 1;
 928                found = self.offset <= self.range.end;
 929            } else {
 930                self.chunks
 931                    .search_forward(|summary| summary.text.lines.row > 0);
 932                self.offset = *self.chunks.start();
 933
 934                if let Some(newline_ix) = self.peek().and_then(|chunk| chunk.find('\n')) {
 935                    self.offset += newline_ix + 1;
 936                    found = self.offset <= self.range.end;
 937                } else {
 938                    self.offset = self.chunks.end();
 939                }
 940            }
 941
 942            if self.offset == self.chunks.end() {
 943                self.next();
 944            }
 945        }
 946
 947        if self.offset > self.range.end {
 948            self.offset = cmp::min(self.offset, self.range.end);
 949            self.chunks.seek(&self.offset, Bias::Right);
 950        }
 951
 952        found
 953    }
 954
 955    /// Move this cursor to the preceding position in the rope that starts a new line.
 956    /// Reversed chunks iterators are not currently supported and will panic.
 957    ///
 958    /// If this cursor is not on the start of a line, it will be moved to the start of
 959    /// its current line. Otherwise it will be moved to the start of the previous line.
 960    /// It updates the cursor's position and returns true if a previous line was found,
 961    /// or false if the cursor was already at the start of the rope.
 962    pub fn prev_line(&mut self) -> bool {
 963        assert!(!self.reversed);
 964
 965        let initial_offset = self.offset;
 966
 967        if self.offset == *self.chunks.start() {
 968            self.chunks.prev();
 969        }
 970
 971        if let Some(chunk) = self.chunks.item() {
 972            let mut end_ix = self.offset - *self.chunks.start();
 973            if chunk.text.as_bytes()[end_ix - 1] == b'\n' {
 974                end_ix -= 1;
 975            }
 976
 977            if let Some(newline_ix) = chunk.text[..end_ix].rfind('\n') {
 978                self.offset = *self.chunks.start() + newline_ix + 1;
 979                if self.offset_is_valid() {
 980                    return true;
 981                }
 982            }
 983        }
 984
 985        self.chunks
 986            .search_backward(|summary| summary.text.lines.row > 0);
 987        self.offset = *self.chunks.start();
 988        if let Some(chunk) = self.chunks.item()
 989            && let Some(newline_ix) = chunk.text.rfind('\n')
 990        {
 991            self.offset += newline_ix + 1;
 992            if self.offset_is_valid() {
 993                if self.offset == self.chunks.end() {
 994                    self.chunks.next();
 995                }
 996
 997                return true;
 998            }
 999        }
1000
1001        if !self.offset_is_valid() || self.chunks.item().is_none() {
1002            self.offset = self.range.start;
1003            self.chunks.seek(&self.offset, Bias::Right);
1004        }
1005
1006        self.offset < initial_offset && self.offset == 0
1007    }
1008
1009    pub fn peek(&self) -> Option<&'a str> {
1010        if !self.offset_is_valid() {
1011            return None;
1012        }
1013
1014        let chunk = self.chunks.item()?;
1015        let chunk_start = *self.chunks.start();
1016        let slice_range = if self.reversed {
1017            let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
1018            let slice_end = self.offset - chunk_start;
1019            slice_start..slice_end
1020        } else {
1021            let slice_start = self.offset - chunk_start;
1022            let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start;
1023            slice_start..slice_end
1024        };
1025
1026        Some(&chunk.text[slice_range])
1027    }
1028
1029    /// Returns bitmaps that represent character positions and tab positions
1030    pub fn peek_with_bitmaps(&self) -> Option<ChunkBitmaps<'a>> {
1031        if !self.offset_is_valid() {
1032            return None;
1033        }
1034
1035        let chunk = self.chunks.item()?;
1036        let chunk_start = *self.chunks.start();
1037        let slice_range = if self.reversed {
1038            let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
1039            let slice_end = self.offset - chunk_start;
1040            slice_start..slice_end
1041        } else {
1042            let slice_start = self.offset - chunk_start;
1043            let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start;
1044            slice_start..slice_end
1045        };
1046        let chunk_start_offset = slice_range.start;
1047        let slice_text = &chunk.text[slice_range];
1048
1049        // Shift the tabs to align with our slice window
1050        let shifted_tabs = chunk.tabs() >> chunk_start_offset;
1051        let shifted_chars = chunk.chars() >> chunk_start_offset;
1052
1053        Some(ChunkBitmaps {
1054            text: slice_text,
1055            chars: shifted_chars,
1056            tabs: shifted_tabs,
1057        })
1058    }
1059
1060    pub fn lines(self) -> Lines<'a> {
1061        let reversed = self.reversed;
1062        Lines {
1063            chunks: self,
1064            current_line: String::new(),
1065            done: false,
1066            reversed,
1067        }
1068    }
1069
1070    pub fn equals_str(&self, other: &str) -> bool {
1071        let chunk = self.clone();
1072        if chunk.reversed {
1073            let mut offset = other.len();
1074            for chunk in chunk {
1075                if other[0..offset].ends_with(chunk) {
1076                    offset -= chunk.len();
1077                } else {
1078                    return false;
1079                }
1080            }
1081            if offset != 0 {
1082                return false;
1083            }
1084        } else {
1085            let mut offset = 0;
1086            for chunk in chunk {
1087                if offset >= other.len() {
1088                    return false;
1089                }
1090                if other[offset..].starts_with(chunk) {
1091                    offset += chunk.len();
1092                } else {
1093                    return false;
1094                }
1095            }
1096            if offset != other.len() {
1097                return false;
1098            }
1099        }
1100
1101        true
1102    }
1103}
1104
1105pub struct ChunkWithBitmaps<'a>(pub Chunks<'a>);
1106
1107impl<'a> Iterator for ChunkWithBitmaps<'a> {
1108    /// text, chars bitmap, tabs bitmap
1109    type Item = ChunkBitmaps<'a>;
1110
1111    fn next(&mut self) -> Option<Self::Item> {
1112        let chunk_bitmaps = self.0.peek_with_bitmaps()?;
1113        if self.0.reversed {
1114            self.0.offset -= chunk_bitmaps.text.len();
1115            if self.0.offset <= *self.0.chunks.start() {
1116                self.0.chunks.prev();
1117            }
1118        } else {
1119            self.0.offset += chunk_bitmaps.text.len();
1120            if self.0.offset >= self.0.chunks.end() {
1121                self.0.chunks.next();
1122            }
1123        }
1124
1125        Some(chunk_bitmaps)
1126    }
1127}
1128
1129impl<'a> Iterator for Chunks<'a> {
1130    type Item = &'a str;
1131
1132    fn next(&mut self) -> Option<Self::Item> {
1133        let chunk = self.peek()?;
1134        if self.reversed {
1135            self.offset -= chunk.len();
1136            if self.offset <= *self.chunks.start() {
1137                self.chunks.prev();
1138            }
1139        } else {
1140            self.offset += chunk.len();
1141            if self.offset >= self.chunks.end() {
1142                self.chunks.next();
1143            }
1144        }
1145
1146        Some(chunk)
1147    }
1148}
1149
1150pub struct Bytes<'a> {
1151    chunks: sum_tree::Cursor<'a, 'static, Chunk, usize>,
1152    range: Range<usize>,
1153    reversed: bool,
1154}
1155
1156impl<'a> Bytes<'a> {
1157    pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
1158        let mut chunks = rope.chunks.cursor(());
1159        if reversed {
1160            chunks.seek(&range.end, Bias::Left);
1161        } else {
1162            chunks.seek(&range.start, Bias::Right);
1163        }
1164        Self {
1165            chunks,
1166            range,
1167            reversed,
1168        }
1169    }
1170
1171    pub fn peek(&self) -> Option<&'a [u8]> {
1172        let chunk = self.chunks.item()?;
1173        if self.reversed && self.range.start >= self.chunks.end() {
1174            return None;
1175        }
1176        let chunk_start = *self.chunks.start();
1177        if self.range.end <= chunk_start {
1178            return None;
1179        }
1180        let start = self.range.start.saturating_sub(chunk_start);
1181        let end = self.range.end - chunk_start;
1182        Some(&chunk.text.as_bytes()[start..chunk.text.len().min(end)])
1183    }
1184}
1185
1186impl<'a> Iterator for Bytes<'a> {
1187    type Item = &'a [u8];
1188
1189    fn next(&mut self) -> Option<Self::Item> {
1190        let result = self.peek();
1191        if result.is_some() {
1192            if self.reversed {
1193                self.chunks.prev();
1194            } else {
1195                self.chunks.next();
1196            }
1197        }
1198        result
1199    }
1200}
1201
1202impl io::Read for Bytes<'_> {
1203    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1204        if let Some(chunk) = self.peek() {
1205            let len = cmp::min(buf.len(), chunk.len());
1206            if self.reversed {
1207                buf[..len].copy_from_slice(&chunk[chunk.len() - len..]);
1208                buf[..len].reverse();
1209                self.range.end -= len;
1210            } else {
1211                buf[..len].copy_from_slice(&chunk[..len]);
1212                self.range.start += len;
1213            }
1214
1215            if len == chunk.len() {
1216                if self.reversed {
1217                    self.chunks.prev();
1218                } else {
1219                    self.chunks.next();
1220                }
1221            }
1222            Ok(len)
1223        } else {
1224            Ok(0)
1225        }
1226    }
1227}
1228
1229pub struct Lines<'a> {
1230    chunks: Chunks<'a>,
1231    current_line: String,
1232    done: bool,
1233    reversed: bool,
1234}
1235
1236impl<'a> Lines<'a> {
1237    pub fn next(&mut self) -> Option<&str> {
1238        if self.done {
1239            return None;
1240        }
1241
1242        self.current_line.clear();
1243
1244        while let Some(chunk) = self.chunks.peek() {
1245            let chunk_lines = chunk.split('\n');
1246            if self.reversed {
1247                let mut chunk_lines = chunk_lines.rev().peekable();
1248                if let Some(chunk_line) = chunk_lines.next() {
1249                    let done = chunk_lines.peek().is_some();
1250                    if done {
1251                        self.chunks
1252                            .seek(self.chunks.offset() - chunk_line.len() - "\n".len());
1253                        if self.current_line.is_empty() {
1254                            return Some(chunk_line);
1255                        }
1256                    }
1257                    self.current_line.insert_str(0, chunk_line);
1258                    if done {
1259                        return Some(&self.current_line);
1260                    }
1261                }
1262            } else {
1263                let mut chunk_lines = chunk_lines.peekable();
1264                if let Some(chunk_line) = chunk_lines.next() {
1265                    let done = chunk_lines.peek().is_some();
1266                    if done {
1267                        self.chunks
1268                            .seek(self.chunks.offset() + chunk_line.len() + "\n".len());
1269                        if self.current_line.is_empty() {
1270                            return Some(chunk_line);
1271                        }
1272                    }
1273                    self.current_line.push_str(chunk_line);
1274                    if done {
1275                        return Some(&self.current_line);
1276                    }
1277                }
1278            }
1279
1280            self.chunks.next();
1281        }
1282
1283        self.done = true;
1284        Some(&self.current_line)
1285    }
1286
1287    pub fn seek(&mut self, offset: usize) {
1288        self.chunks.seek(offset);
1289        self.current_line.clear();
1290        self.done = false;
1291    }
1292
1293    pub fn offset(&self) -> usize {
1294        self.chunks.offset()
1295    }
1296}
1297
1298impl sum_tree::Item for Chunk {
1299    type Summary = ChunkSummary;
1300
1301    fn summary(&self, _cx: ()) -> Self::Summary {
1302        ChunkSummary {
1303            text: self.as_slice().text_summary(),
1304        }
1305    }
1306}
1307
1308#[derive(Clone, Debug, Default, Eq, PartialEq)]
1309pub struct ChunkSummary {
1310    text: TextSummary,
1311}
1312
1313impl sum_tree::ContextLessSummary for ChunkSummary {
1314    fn zero() -> Self {
1315        Default::default()
1316    }
1317
1318    fn add_summary(&mut self, summary: &Self) {
1319        self.text += &summary.text;
1320    }
1321}
1322
1323/// Summary of a string of text.
1324#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
1325pub struct TextSummary {
1326    /// Length in bytes.
1327    pub len: usize,
1328    /// Length in UTF-8.
1329    pub chars: usize,
1330    /// Length in UTF-16 code units
1331    pub len_utf16: OffsetUtf16,
1332    /// A point representing the number of lines and the length of the last line.
1333    ///
1334    /// In other words, it marks the point after the last byte in the text, (if
1335    /// EOF was a character, this would be its position).
1336    pub lines: Point,
1337    /// How many `char`s are in the first line
1338    pub first_line_chars: u32,
1339    /// How many `char`s are in the last line
1340    pub last_line_chars: u32,
1341    /// How many UTF-16 code units are in the last line
1342    pub last_line_len_utf16: u32,
1343    /// The row idx of the longest row
1344    pub longest_row: u32,
1345    /// How many `char`s are in the longest row
1346    pub longest_row_chars: u32,
1347}
1348
1349impl TextSummary {
1350    pub fn lines_utf16(&self) -> PointUtf16 {
1351        PointUtf16 {
1352            row: self.lines.row,
1353            column: self.last_line_len_utf16,
1354        }
1355    }
1356
1357    pub fn newline() -> Self {
1358        Self {
1359            len: 1,
1360            chars: 1,
1361            len_utf16: OffsetUtf16(1),
1362            first_line_chars: 0,
1363            last_line_chars: 0,
1364            last_line_len_utf16: 0,
1365            lines: Point::new(1, 0),
1366            longest_row: 0,
1367            longest_row_chars: 0,
1368        }
1369    }
1370
1371    pub fn add_newline(&mut self) {
1372        self.len += 1;
1373        self.len_utf16 += OffsetUtf16(self.len_utf16.0 + 1);
1374        self.last_line_chars = 0;
1375        self.last_line_len_utf16 = 0;
1376        self.lines += Point::new(1, 0);
1377    }
1378}
1379
1380impl<'a> From<&'a str> for TextSummary {
1381    fn from(text: &'a str) -> Self {
1382        let mut len_utf16 = OffsetUtf16(0);
1383        let mut lines = Point::new(0, 0);
1384        let mut first_line_chars = 0;
1385        let mut last_line_chars = 0;
1386        let mut last_line_len_utf16 = 0;
1387        let mut longest_row = 0;
1388        let mut longest_row_chars = 0;
1389        let mut chars = 0;
1390        for c in text.chars() {
1391            chars += 1;
1392            len_utf16.0 += c.len_utf16();
1393
1394            if c == '\n' {
1395                lines += Point::new(1, 0);
1396                last_line_len_utf16 = 0;
1397                last_line_chars = 0;
1398            } else {
1399                lines.column += c.len_utf8() as u32;
1400                last_line_len_utf16 += c.len_utf16() as u32;
1401                last_line_chars += 1;
1402            }
1403
1404            if lines.row == 0 {
1405                first_line_chars = last_line_chars;
1406            }
1407
1408            if last_line_chars > longest_row_chars {
1409                longest_row = lines.row;
1410                longest_row_chars = last_line_chars;
1411            }
1412        }
1413
1414        TextSummary {
1415            len: text.len(),
1416            chars,
1417            len_utf16,
1418            lines,
1419            first_line_chars,
1420            last_line_chars,
1421            last_line_len_utf16,
1422            longest_row,
1423            longest_row_chars,
1424        }
1425    }
1426}
1427
1428impl sum_tree::ContextLessSummary for TextSummary {
1429    fn zero() -> Self {
1430        Default::default()
1431    }
1432
1433    fn add_summary(&mut self, summary: &Self) {
1434        *self += summary;
1435    }
1436}
1437
1438impl ops::Add<Self> for TextSummary {
1439    type Output = Self;
1440
1441    fn add(mut self, rhs: Self) -> Self::Output {
1442        AddAssign::add_assign(&mut self, &rhs);
1443        self
1444    }
1445}
1446
1447impl<'a> ops::AddAssign<&'a Self> for TextSummary {
1448    fn add_assign(&mut self, other: &'a Self) {
1449        let joined_chars = self.last_line_chars + other.first_line_chars;
1450        if joined_chars > self.longest_row_chars {
1451            self.longest_row = self.lines.row;
1452            self.longest_row_chars = joined_chars;
1453        }
1454        if other.longest_row_chars > self.longest_row_chars {
1455            self.longest_row = self.lines.row + other.longest_row;
1456            self.longest_row_chars = other.longest_row_chars;
1457        }
1458
1459        if self.lines.row == 0 {
1460            self.first_line_chars += other.first_line_chars;
1461        }
1462
1463        if other.lines.row == 0 {
1464            self.last_line_chars += other.first_line_chars;
1465            self.last_line_len_utf16 += other.last_line_len_utf16;
1466        } else {
1467            self.last_line_chars = other.last_line_chars;
1468            self.last_line_len_utf16 = other.last_line_len_utf16;
1469        }
1470
1471        self.chars += other.chars;
1472        self.len += other.len;
1473        self.len_utf16 += other.len_utf16;
1474        self.lines += other.lines;
1475    }
1476}
1477
1478impl ops::AddAssign<Self> for TextSummary {
1479    fn add_assign(&mut self, other: Self) {
1480        *self += &other;
1481    }
1482}
1483
1484pub trait TextDimension:
1485    'static + Clone + Copy + Default + for<'a> Dimension<'a, ChunkSummary> + std::fmt::Debug
1486{
1487    fn from_text_summary(summary: &TextSummary) -> Self;
1488    fn from_chunk(chunk: ChunkSlice) -> Self;
1489    fn add_assign(&mut self, other: &Self);
1490}
1491
1492impl<D1: TextDimension, D2: TextDimension> TextDimension for Dimensions<D1, D2, ()> {
1493    fn from_text_summary(summary: &TextSummary) -> Self {
1494        Dimensions(
1495            D1::from_text_summary(summary),
1496            D2::from_text_summary(summary),
1497            (),
1498        )
1499    }
1500
1501    fn from_chunk(chunk: ChunkSlice) -> Self {
1502        Dimensions(D1::from_chunk(chunk), D2::from_chunk(chunk), ())
1503    }
1504
1505    fn add_assign(&mut self, other: &Self) {
1506        self.0.add_assign(&other.0);
1507        self.1.add_assign(&other.1);
1508    }
1509}
1510
1511impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
1512    fn zero(_cx: ()) -> Self {
1513        Default::default()
1514    }
1515
1516    fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1517        *self += &summary.text;
1518    }
1519}
1520
1521impl TextDimension for TextSummary {
1522    fn from_text_summary(summary: &TextSummary) -> Self {
1523        *summary
1524    }
1525
1526    fn from_chunk(chunk: ChunkSlice) -> Self {
1527        chunk.text_summary()
1528    }
1529
1530    fn add_assign(&mut self, other: &Self) {
1531        *self += other;
1532    }
1533}
1534
1535impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
1536    fn zero(_cx: ()) -> Self {
1537        Default::default()
1538    }
1539
1540    fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1541        *self += summary.text.len;
1542    }
1543}
1544
1545impl TextDimension for usize {
1546    fn from_text_summary(summary: &TextSummary) -> Self {
1547        summary.len
1548    }
1549
1550    fn from_chunk(chunk: ChunkSlice) -> Self {
1551        chunk.len()
1552    }
1553
1554    fn add_assign(&mut self, other: &Self) {
1555        *self += other;
1556    }
1557}
1558
1559impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
1560    fn zero(_cx: ()) -> Self {
1561        Default::default()
1562    }
1563
1564    fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1565        *self += summary.text.len_utf16;
1566    }
1567}
1568
1569impl TextDimension for OffsetUtf16 {
1570    fn from_text_summary(summary: &TextSummary) -> Self {
1571        summary.len_utf16
1572    }
1573
1574    fn from_chunk(chunk: ChunkSlice) -> Self {
1575        chunk.len_utf16()
1576    }
1577
1578    fn add_assign(&mut self, other: &Self) {
1579        *self += other;
1580    }
1581}
1582
1583impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
1584    fn zero(_cx: ()) -> Self {
1585        Default::default()
1586    }
1587
1588    fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1589        *self += summary.text.lines;
1590    }
1591}
1592
1593impl TextDimension for Point {
1594    fn from_text_summary(summary: &TextSummary) -> Self {
1595        summary.lines
1596    }
1597
1598    fn from_chunk(chunk: ChunkSlice) -> Self {
1599        chunk.lines()
1600    }
1601
1602    fn add_assign(&mut self, other: &Self) {
1603        *self += other;
1604    }
1605}
1606
1607impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
1608    fn zero(_cx: ()) -> Self {
1609        Default::default()
1610    }
1611
1612    fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1613        *self += summary.text.lines_utf16();
1614    }
1615}
1616
1617impl TextDimension for PointUtf16 {
1618    fn from_text_summary(summary: &TextSummary) -> Self {
1619        summary.lines_utf16()
1620    }
1621
1622    fn from_chunk(chunk: ChunkSlice) -> Self {
1623        PointUtf16 {
1624            row: chunk.lines().row,
1625            column: chunk.last_line_len_utf16(),
1626        }
1627    }
1628
1629    fn add_assign(&mut self, other: &Self) {
1630        *self += other;
1631    }
1632}
1633
1634/// A pair of text dimensions in which only the first dimension is used for comparison,
1635/// but both dimensions are updated during addition and subtraction.
1636#[derive(Clone, Copy, Debug)]
1637pub struct DimensionPair<K, V> {
1638    pub key: K,
1639    pub value: Option<V>,
1640}
1641
1642impl<K: Default, V: Default> Default for DimensionPair<K, V> {
1643    fn default() -> Self {
1644        Self {
1645            key: Default::default(),
1646            value: Some(Default::default()),
1647        }
1648    }
1649}
1650
1651impl<K, V> cmp::Ord for DimensionPair<K, V>
1652where
1653    K: cmp::Ord,
1654{
1655    fn cmp(&self, other: &Self) -> cmp::Ordering {
1656        self.key.cmp(&other.key)
1657    }
1658}
1659
1660impl<K, V> cmp::PartialOrd for DimensionPair<K, V>
1661where
1662    K: cmp::PartialOrd,
1663{
1664    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
1665        self.key.partial_cmp(&other.key)
1666    }
1667}
1668
1669impl<K, V> cmp::PartialEq for DimensionPair<K, V>
1670where
1671    K: cmp::PartialEq,
1672{
1673    fn eq(&self, other: &Self) -> bool {
1674        self.key.eq(&other.key)
1675    }
1676}
1677
1678impl<K, V> ops::Sub for DimensionPair<K, V>
1679where
1680    K: ops::Sub<K, Output = K>,
1681    V: ops::Sub<V, Output = V>,
1682{
1683    type Output = Self;
1684
1685    fn sub(self, rhs: Self) -> Self::Output {
1686        Self {
1687            key: self.key - rhs.key,
1688            value: self.value.zip(rhs.value).map(|(a, b)| a - b),
1689        }
1690    }
1691}
1692
1693impl<K, V> cmp::Eq for DimensionPair<K, V> where K: cmp::Eq {}
1694
1695impl<'a, K, V> sum_tree::Dimension<'a, ChunkSummary> for DimensionPair<K, V>
1696where
1697    K: sum_tree::Dimension<'a, ChunkSummary>,
1698    V: sum_tree::Dimension<'a, ChunkSummary>,
1699{
1700    fn zero(_cx: ()) -> Self {
1701        Self {
1702            key: K::zero(_cx),
1703            value: Some(V::zero(_cx)),
1704        }
1705    }
1706
1707    fn add_summary(&mut self, summary: &'a ChunkSummary, _cx: ()) {
1708        self.key.add_summary(summary, _cx);
1709        if let Some(value) = &mut self.value {
1710            value.add_summary(summary, _cx);
1711        }
1712    }
1713}
1714
1715impl<K, V> TextDimension for DimensionPair<K, V>
1716where
1717    K: TextDimension,
1718    V: TextDimension,
1719{
1720    fn add_assign(&mut self, other: &Self) {
1721        self.key.add_assign(&other.key);
1722        if let Some(value) = &mut self.value {
1723            if let Some(other_value) = other.value.as_ref() {
1724                value.add_assign(other_value);
1725            } else {
1726                self.value.take();
1727            }
1728        }
1729    }
1730
1731    fn from_chunk(chunk: ChunkSlice) -> Self {
1732        Self {
1733            key: K::from_chunk(chunk),
1734            value: Some(V::from_chunk(chunk)),
1735        }
1736    }
1737
1738    fn from_text_summary(summary: &TextSummary) -> Self {
1739        Self {
1740            key: K::from_text_summary(summary),
1741            value: Some(V::from_text_summary(summary)),
1742        }
1743    }
1744}
1745
1746#[cfg(test)]
1747mod tests {
1748    use super::*;
1749    use Bias::{Left, Right};
1750    use rand::prelude::*;
1751    use std::{cmp::Ordering, env, io::Read};
1752    use util::RandomCharIter;
1753
1754    #[ctor::ctor]
1755    fn init_logger() {
1756        zlog::init_test();
1757    }
1758
1759    #[test]
1760    fn test_all_4_byte_chars() {
1761        let mut rope = Rope::new();
1762        let text = "🏀".repeat(256);
1763        rope.push(&text);
1764        assert_eq!(rope.text(), text);
1765    }
1766
1767    #[test]
1768    fn test_clip() {
1769        let rope = Rope::from("🧘");
1770
1771        assert_eq!(rope.clip_offset(1, Bias::Left), 0);
1772        assert_eq!(rope.clip_offset(1, Bias::Right), 4);
1773        assert_eq!(rope.clip_offset(5, Bias::Right), 4);
1774
1775        assert_eq!(
1776            rope.clip_point(Point::new(0, 1), Bias::Left),
1777            Point::new(0, 0)
1778        );
1779        assert_eq!(
1780            rope.clip_point(Point::new(0, 1), Bias::Right),
1781            Point::new(0, 4)
1782        );
1783        assert_eq!(
1784            rope.clip_point(Point::new(0, 5), Bias::Right),
1785            Point::new(0, 4)
1786        );
1787
1788        assert_eq!(
1789            rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Left),
1790            PointUtf16::new(0, 0)
1791        );
1792        assert_eq!(
1793            rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Right),
1794            PointUtf16::new(0, 2)
1795        );
1796        assert_eq!(
1797            rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 3)), Bias::Right),
1798            PointUtf16::new(0, 2)
1799        );
1800
1801        assert_eq!(
1802            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
1803            OffsetUtf16(0)
1804        );
1805        assert_eq!(
1806            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
1807            OffsetUtf16(2)
1808        );
1809        assert_eq!(
1810            rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
1811            OffsetUtf16(2)
1812        );
1813    }
1814
1815    #[test]
1816    fn test_prev_next_line() {
1817        let rope = Rope::from("abc\ndef\nghi\njkl");
1818
1819        let mut chunks = rope.chunks();
1820        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
1821
1822        assert!(chunks.next_line());
1823        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd');
1824
1825        assert!(chunks.next_line());
1826        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g');
1827
1828        assert!(chunks.next_line());
1829        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j');
1830
1831        assert!(!chunks.next_line());
1832        assert_eq!(chunks.peek(), None);
1833
1834        assert!(chunks.prev_line());
1835        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j');
1836
1837        assert!(chunks.prev_line());
1838        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g');
1839
1840        assert!(chunks.prev_line());
1841        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd');
1842
1843        assert!(chunks.prev_line());
1844        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
1845
1846        assert!(!chunks.prev_line());
1847        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
1848
1849        // Only return true when the cursor has moved to the start of a line
1850        let mut chunks = rope.chunks_in_range(5..7);
1851        chunks.seek(6);
1852        assert!(!chunks.prev_line());
1853        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'e');
1854
1855        assert!(!chunks.next_line());
1856        assert_eq!(chunks.peek(), None);
1857    }
1858
1859    #[test]
1860    fn test_lines() {
1861        let rope = Rope::from("abc\ndefg\nhi");
1862        let mut lines = rope.chunks().lines();
1863        assert_eq!(lines.next(), Some("abc"));
1864        assert_eq!(lines.next(), Some("defg"));
1865        assert_eq!(lines.next(), Some("hi"));
1866        assert_eq!(lines.next(), None);
1867
1868        let rope = Rope::from("abc\ndefg\nhi\n");
1869        let mut lines = rope.chunks().lines();
1870        assert_eq!(lines.next(), Some("abc"));
1871        assert_eq!(lines.next(), Some("defg"));
1872        assert_eq!(lines.next(), Some("hi"));
1873        assert_eq!(lines.next(), Some(""));
1874        assert_eq!(lines.next(), None);
1875
1876        let rope = Rope::from("abc\ndefg\nhi");
1877        let mut lines = rope.reversed_chunks_in_range(0..rope.len()).lines();
1878        assert_eq!(lines.next(), Some("hi"));
1879        assert_eq!(lines.next(), Some("defg"));
1880        assert_eq!(lines.next(), Some("abc"));
1881        assert_eq!(lines.next(), None);
1882
1883        let rope = Rope::from("abc\ndefg\nhi\n");
1884        let mut lines = rope.reversed_chunks_in_range(0..rope.len()).lines();
1885        assert_eq!(lines.next(), Some(""));
1886        assert_eq!(lines.next(), Some("hi"));
1887        assert_eq!(lines.next(), Some("defg"));
1888        assert_eq!(lines.next(), Some("abc"));
1889        assert_eq!(lines.next(), None);
1890
1891        let rope = Rope::from("abc\nlonger line test\nhi");
1892        let mut lines = rope.chunks().lines();
1893        assert_eq!(lines.next(), Some("abc"));
1894        assert_eq!(lines.next(), Some("longer line test"));
1895        assert_eq!(lines.next(), Some("hi"));
1896        assert_eq!(lines.next(), None);
1897
1898        let rope = Rope::from("abc\nlonger line test\nhi");
1899        let mut lines = rope.reversed_chunks_in_range(0..rope.len()).lines();
1900        assert_eq!(lines.next(), Some("hi"));
1901        assert_eq!(lines.next(), Some("longer line test"));
1902        assert_eq!(lines.next(), Some("abc"));
1903        assert_eq!(lines.next(), None);
1904    }
1905
1906    #[gpui::test(iterations = 100)]
1907    fn test_random_rope(mut rng: StdRng) {
1908        let operations = env::var("OPERATIONS")
1909            .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1910            .unwrap_or(10);
1911
1912        let mut expected = String::new();
1913        let mut actual = Rope::new();
1914        for _ in 0..operations {
1915            let end_ix = clip_offset(&expected, rng.random_range(0..=expected.len()), Right);
1916            let start_ix = clip_offset(&expected, rng.random_range(0..=end_ix), Left);
1917            let len = rng.random_range(0..=64);
1918            let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1919
1920            let mut new_actual = Rope::new();
1921            let mut cursor = actual.cursor(0);
1922            new_actual.append(cursor.slice(start_ix));
1923            new_actual.push(&new_text);
1924            cursor.seek_forward(end_ix);
1925            new_actual.append(cursor.suffix());
1926            actual = new_actual;
1927
1928            expected.replace_range(start_ix..end_ix, &new_text);
1929
1930            assert_eq!(actual.text(), expected);
1931            log::info!("text: {:?}", expected);
1932
1933            for _ in 0..5 {
1934                let end_ix = clip_offset(&expected, rng.random_range(0..=expected.len()), Right);
1935                let start_ix = clip_offset(&expected, rng.random_range(0..=end_ix), Left);
1936
1937                let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1938                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1939
1940                let mut actual_text = String::new();
1941                actual
1942                    .bytes_in_range(start_ix..end_ix)
1943                    .read_to_string(&mut actual_text)
1944                    .unwrap();
1945                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1946
1947                assert_eq!(
1948                    actual
1949                        .reversed_chunks_in_range(start_ix..end_ix)
1950                        .collect::<Vec<&str>>()
1951                        .into_iter()
1952                        .rev()
1953                        .collect::<String>(),
1954                    &expected[start_ix..end_ix]
1955                );
1956
1957                let mut expected_line_starts: Vec<_> = expected[start_ix..end_ix]
1958                    .match_indices('\n')
1959                    .map(|(index, _)| start_ix + index + 1)
1960                    .collect();
1961
1962                let mut chunks = actual.chunks_in_range(start_ix..end_ix);
1963
1964                let mut actual_line_starts = Vec::new();
1965                while chunks.next_line() {
1966                    actual_line_starts.push(chunks.offset());
1967                }
1968                assert_eq!(
1969                    actual_line_starts,
1970                    expected_line_starts,
1971                    "actual line starts != expected line starts when using next_line() for {:?} ({:?})",
1972                    &expected[start_ix..end_ix],
1973                    start_ix..end_ix
1974                );
1975
1976                if start_ix < end_ix
1977                    && (start_ix == 0 || expected.as_bytes()[start_ix - 1] == b'\n')
1978                {
1979                    expected_line_starts.insert(0, start_ix);
1980                }
1981                // Remove the last index if it starts at the end of the range.
1982                if expected_line_starts.last() == Some(&end_ix) {
1983                    expected_line_starts.pop();
1984                }
1985
1986                let mut actual_line_starts = Vec::new();
1987                while chunks.prev_line() {
1988                    actual_line_starts.push(chunks.offset());
1989                }
1990                actual_line_starts.reverse();
1991                assert_eq!(
1992                    actual_line_starts,
1993                    expected_line_starts,
1994                    "actual line starts != expected line starts when using prev_line() for {:?} ({:?})",
1995                    &expected[start_ix..end_ix],
1996                    start_ix..end_ix
1997                );
1998
1999                // Check that next_line/prev_line work correctly from random positions
2000                let mut offset = rng.random_range(start_ix..=end_ix);
2001                while !expected.is_char_boundary(offset) {
2002                    offset -= 1;
2003                }
2004                chunks.seek(offset);
2005
2006                for _ in 0..5 {
2007                    if rng.random() {
2008                        let expected_next_line_start = expected[offset..end_ix]
2009                            .find('\n')
2010                            .map(|newline_ix| offset + newline_ix + 1);
2011
2012                        let moved = chunks.next_line();
2013                        assert_eq!(
2014                            moved,
2015                            expected_next_line_start.is_some(),
2016                            "unexpected result from next_line after seeking to {} in range {:?} ({:?})",
2017                            offset,
2018                            start_ix..end_ix,
2019                            &expected[start_ix..end_ix]
2020                        );
2021                        if let Some(expected_next_line_start) = expected_next_line_start {
2022                            assert_eq!(
2023                                chunks.offset(),
2024                                expected_next_line_start,
2025                                "invalid position after seeking to {} in range {:?} ({:?})",
2026                                offset,
2027                                start_ix..end_ix,
2028                                &expected[start_ix..end_ix]
2029                            );
2030                        } else {
2031                            assert_eq!(
2032                                chunks.offset(),
2033                                end_ix,
2034                                "invalid position after seeking to {} in range {:?} ({:?})",
2035                                offset,
2036                                start_ix..end_ix,
2037                                &expected[start_ix..end_ix]
2038                            );
2039                        }
2040                    } else {
2041                        let search_end = if offset > 0 && expected.as_bytes()[offset - 1] == b'\n' {
2042                            offset - 1
2043                        } else {
2044                            offset
2045                        };
2046
2047                        let expected_prev_line_start = expected[..search_end]
2048                            .rfind('\n')
2049                            .and_then(|newline_ix| {
2050                                let line_start_ix = newline_ix + 1;
2051                                if line_start_ix >= start_ix {
2052                                    Some(line_start_ix)
2053                                } else {
2054                                    None
2055                                }
2056                            })
2057                            .or({
2058                                if offset > 0 && start_ix == 0 {
2059                                    Some(0)
2060                                } else {
2061                                    None
2062                                }
2063                            });
2064
2065                        let moved = chunks.prev_line();
2066                        assert_eq!(
2067                            moved,
2068                            expected_prev_line_start.is_some(),
2069                            "unexpected result from prev_line after seeking to {} in range {:?} ({:?})",
2070                            offset,
2071                            start_ix..end_ix,
2072                            &expected[start_ix..end_ix]
2073                        );
2074                        if let Some(expected_prev_line_start) = expected_prev_line_start {
2075                            assert_eq!(
2076                                chunks.offset(),
2077                                expected_prev_line_start,
2078                                "invalid position after seeking to {} in range {:?} ({:?})",
2079                                offset,
2080                                start_ix..end_ix,
2081                                &expected[start_ix..end_ix]
2082                            );
2083                        } else {
2084                            assert_eq!(
2085                                chunks.offset(),
2086                                start_ix,
2087                                "invalid position after seeking to {} in range {:?} ({:?})",
2088                                offset,
2089                                start_ix..end_ix,
2090                                &expected[start_ix..end_ix]
2091                            );
2092                        }
2093                    }
2094
2095                    assert!((start_ix..=end_ix).contains(&chunks.offset()));
2096                    if rng.random() {
2097                        offset = rng.random_range(start_ix..=end_ix);
2098                        while !expected.is_char_boundary(offset) {
2099                            offset -= 1;
2100                        }
2101                        chunks.seek(offset);
2102                    } else {
2103                        chunks.next();
2104                        offset = chunks.offset();
2105                        assert!((start_ix..=end_ix).contains(&chunks.offset()));
2106                    }
2107                }
2108            }
2109
2110            let mut offset_utf16 = OffsetUtf16(0);
2111            let mut point = Point::new(0, 0);
2112            let mut point_utf16 = PointUtf16::new(0, 0);
2113            for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
2114                assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
2115                assert_eq!(
2116                    actual.offset_to_point_utf16(ix),
2117                    point_utf16,
2118                    "offset_to_point_utf16({})",
2119                    ix
2120                );
2121                assert_eq!(
2122                    actual.point_to_offset(point),
2123                    ix,
2124                    "point_to_offset({:?})",
2125                    point
2126                );
2127                assert_eq!(
2128                    actual.point_utf16_to_offset(point_utf16),
2129                    ix,
2130                    "point_utf16_to_offset({:?})",
2131                    point_utf16
2132                );
2133                assert_eq!(
2134                    actual.offset_to_offset_utf16(ix),
2135                    offset_utf16,
2136                    "offset_to_offset_utf16({:?})",
2137                    ix
2138                );
2139                assert_eq!(
2140                    actual.offset_utf16_to_offset(offset_utf16),
2141                    ix,
2142                    "offset_utf16_to_offset({:?})",
2143                    offset_utf16
2144                );
2145                if ch == '\n' {
2146                    point += Point::new(1, 0);
2147                    point_utf16 += PointUtf16::new(1, 0);
2148                } else {
2149                    point.column += ch.len_utf8() as u32;
2150                    point_utf16.column += ch.len_utf16() as u32;
2151                }
2152                offset_utf16.0 += ch.len_utf16();
2153            }
2154
2155            let mut offset_utf16 = OffsetUtf16(0);
2156            let mut point_utf16 = Unclipped(PointUtf16::zero());
2157            for unit in expected.encode_utf16() {
2158                let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
2159                let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
2160                assert!(right_offset >= left_offset);
2161                // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
2162                actual.offset_utf16_to_offset(left_offset);
2163                actual.offset_utf16_to_offset(right_offset);
2164
2165                let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
2166                let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
2167                assert!(right_point >= left_point);
2168                // Ensure translating valid UTF-16 points to offsets doesn't panic.
2169                actual.point_utf16_to_offset(left_point);
2170                actual.point_utf16_to_offset(right_point);
2171
2172                offset_utf16.0 += 1;
2173                if unit == b'\n' as u16 {
2174                    point_utf16.0 += PointUtf16::new(1, 0);
2175                } else {
2176                    point_utf16.0 += PointUtf16::new(0, 1);
2177                }
2178            }
2179
2180            for _ in 0..5 {
2181                let end_ix = clip_offset(&expected, rng.random_range(0..=expected.len()), Right);
2182                let start_ix = clip_offset(&expected, rng.random_range(0..=end_ix), Left);
2183                assert_eq!(
2184                    actual.cursor(start_ix).summary::<TextSummary>(end_ix),
2185                    TextSummary::from(&expected[start_ix..end_ix])
2186                );
2187            }
2188
2189            let mut expected_longest_rows = Vec::new();
2190            let mut longest_line_len = -1_isize;
2191            for (row, line) in expected.split('\n').enumerate() {
2192                let row = row as u32;
2193                assert_eq!(
2194                    actual.line_len(row),
2195                    line.len() as u32,
2196                    "invalid line len for row {}",
2197                    row
2198                );
2199
2200                let line_char_count = line.chars().count() as isize;
2201                match line_char_count.cmp(&longest_line_len) {
2202                    Ordering::Less => {}
2203                    Ordering::Equal => expected_longest_rows.push(row),
2204                    Ordering::Greater => {
2205                        longest_line_len = line_char_count;
2206                        expected_longest_rows.clear();
2207                        expected_longest_rows.push(row);
2208                    }
2209                }
2210            }
2211
2212            let longest_row = actual.summary().longest_row;
2213            assert!(
2214                expected_longest_rows.contains(&longest_row),
2215                "incorrect longest row {}. expected {:?} with length {}",
2216                longest_row,
2217                expected_longest_rows,
2218                longest_line_len,
2219            );
2220        }
2221    }
2222
2223    #[test]
2224    fn test_chunks_equals_str() {
2225        let text = "This is a multi-chunk\n& multi-line test string!";
2226        let rope = Rope::from(text);
2227        for start in 0..text.len() {
2228            for end in start..text.len() {
2229                let range = start..end;
2230                let correct_substring = &text[start..end];
2231
2232                // Test that correct range returns true
2233                assert!(
2234                    rope.chunks_in_range(range.clone())
2235                        .equals_str(correct_substring)
2236                );
2237                assert!(
2238                    rope.reversed_chunks_in_range(range.clone())
2239                        .equals_str(correct_substring)
2240                );
2241
2242                // Test that all other ranges return false (unless they happen to match)
2243                for other_start in 0..text.len() {
2244                    for other_end in other_start..text.len() {
2245                        if other_start == start && other_end == end {
2246                            continue;
2247                        }
2248                        let other_substring = &text[other_start..other_end];
2249
2250                        // Only assert false if the substrings are actually different
2251                        if other_substring == correct_substring {
2252                            continue;
2253                        }
2254                        assert!(
2255                            !rope
2256                                .chunks_in_range(range.clone())
2257                                .equals_str(other_substring)
2258                        );
2259                        assert!(
2260                            !rope
2261                                .reversed_chunks_in_range(range.clone())
2262                                .equals_str(other_substring)
2263                        );
2264                    }
2265                }
2266            }
2267        }
2268
2269        let rope = Rope::from("");
2270        assert!(rope.chunks_in_range(0..0).equals_str(""));
2271        assert!(rope.reversed_chunks_in_range(0..0).equals_str(""));
2272        assert!(!rope.chunks_in_range(0..0).equals_str("foo"));
2273        assert!(!rope.reversed_chunks_in_range(0..0).equals_str("foo"));
2274    }
2275
2276    #[test]
2277    fn test_is_char_boundary() {
2278        let fixture = "";
2279        let rope = Rope::from("");
2280        for b in 0..=fixture.len() {
2281            assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
2282        }
2283        let fixture = "";
2284        let rope = Rope::from("");
2285        for b in 0..=fixture.len() {
2286            assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
2287        }
2288        let fixture = "🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️‍⚧️🏁🏳️‍🌈🏴‍☠️⛳️📬📭🏴🏳️🚩";
2289        let rope = Rope::from("🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️‍⚧️🏁🏳️‍🌈🏴‍☠️⛳️📬📭🏴🏳️🚩");
2290        for b in 0..=fixture.len() {
2291            assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
2292        }
2293    }
2294
2295    #[test]
2296    fn test_floor_char_boundary() {
2297        // polyfill of str::floor_char_boundary
2298        fn floor_char_boundary(str: &str, index: usize) -> usize {
2299            if index >= str.len() {
2300                str.len()
2301            } else {
2302                let lower_bound = index.saturating_sub(3);
2303                let new_index = str.as_bytes()[lower_bound..=index]
2304                    .iter()
2305                    .rposition(|b| (*b as i8) >= -0x40);
2306
2307                lower_bound + new_index.unwrap()
2308            }
2309        }
2310
2311        let fixture = "";
2312        let rope = Rope::from("");
2313        for b in 0..=fixture.len() {
2314            assert_eq!(
2315                rope.floor_char_boundary(b),
2316                floor_char_boundary(&fixture, b)
2317            );
2318        }
2319
2320        let fixture = "";
2321        let rope = Rope::from("");
2322        for b in 0..=fixture.len() {
2323            assert_eq!(
2324                rope.floor_char_boundary(b),
2325                floor_char_boundary(&fixture, b)
2326            );
2327        }
2328
2329        let fixture = "🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️‍⚧️🏁🏳️‍🌈🏴‍☠️⛳️📬📭🏴🏳️🚩";
2330        let rope = Rope::from("🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️‍⚧️🏁🏳️‍🌈🏴‍☠️⛳️📬📭🏴🏳️🚩");
2331        for b in 0..=fixture.len() {
2332            assert_eq!(
2333                rope.floor_char_boundary(b),
2334                floor_char_boundary(&fixture, b)
2335            );
2336        }
2337    }
2338
2339    #[test]
2340    fn test_ceil_char_boundary() {
2341        // polyfill of str::ceil_char_boundary
2342        fn ceil_char_boundary(str: &str, index: usize) -> usize {
2343            if index > str.len() {
2344                str.len()
2345            } else {
2346                let upper_bound = Ord::min(index + 4, str.len());
2347                str.as_bytes()[index..upper_bound]
2348                    .iter()
2349                    .position(|b| (*b as i8) >= -0x40)
2350                    .map_or(upper_bound, |pos| pos + index)
2351            }
2352        }
2353
2354        let fixture = "";
2355        let rope = Rope::from("");
2356        for b in 0..=fixture.len() {
2357            assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
2358        }
2359
2360        let fixture = "";
2361        let rope = Rope::from("");
2362        for b in 0..=fixture.len() {
2363            assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
2364        }
2365
2366        let fixture = "🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️‍⚧️🏁🏳️‍🌈🏴‍☠️⛳️📬📭🏴🏳️🚩";
2367        let rope = Rope::from("🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️‍⚧️🏁🏳️‍🌈🏴‍☠️⛳️📬📭🏴🏳️🚩");
2368        for b in 0..=fixture.len() {
2369            assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
2370        }
2371    }
2372
2373    #[test]
2374    fn test_to_string_with_line_ending() {
2375        // Test Unix line endings (no conversion)
2376        let rope = Rope::from("line1\nline2\nline3");
2377        assert_eq!(
2378            rope.to_string_with_line_ending(LineEnding::Unix),
2379            "line1\nline2\nline3"
2380        );
2381
2382        // Test Windows line endings
2383        assert_eq!(
2384            rope.to_string_with_line_ending(LineEnding::Windows),
2385            "line1\r\nline2\r\nline3"
2386        );
2387
2388        // Test empty rope
2389        let empty_rope = Rope::from("");
2390        assert_eq!(
2391            empty_rope.to_string_with_line_ending(LineEnding::Windows),
2392            ""
2393        );
2394
2395        // Test single line (no newlines)
2396        let single_line = Rope::from("single line");
2397        assert_eq!(
2398            single_line.to_string_with_line_ending(LineEnding::Windows),
2399            "single line"
2400        );
2401
2402        // Test rope ending with newline
2403        let ending_newline = Rope::from("line1\nline2\n");
2404        assert_eq!(
2405            ending_newline.to_string_with_line_ending(LineEnding::Windows),
2406            "line1\r\nline2\r\n"
2407        );
2408
2409        // Test large rope with multiple chunks
2410        let mut large_rope = Rope::new();
2411        for i in 0..100 {
2412            large_rope.push(&format!("line{}\n", i));
2413        }
2414        let result = large_rope.to_string_with_line_ending(LineEnding::Windows);
2415        assert!(result.contains("\r\n"));
2416        assert!(!result.contains("\n\n"));
2417        assert_eq!(result.matches("\r\n").count(), 100);
2418    }
2419
2420    fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
2421        while !text.is_char_boundary(offset) {
2422            match bias {
2423                Bias::Left => offset -= 1,
2424                Bias::Right => offset += 1,
2425            }
2426        }
2427        offset
2428    }
2429
2430    impl Rope {
2431        fn text(&self) -> String {
2432            let mut text = String::new();
2433            for chunk in self.chunks.cursor::<()>(()) {
2434                text.push_str(&chunk.text);
2435            }
2436            text
2437        }
2438    }
2439}