rope.rs

   1mod chunk;
   2mod offset_utf16;
   3mod point;
   4mod point_utf16;
   5mod unclipped;
   6
   7use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
   8use regex::Regex;
   9use smallvec::SmallVec;
  10use std::{
  11    borrow::Cow,
  12    cmp, fmt, io, mem,
  13    ops::{self, AddAssign, Range},
  14    str,
  15    sync::{Arc, LazyLock},
  16};
  17use sum_tree::{Bias, Dimension, Dimensions, SumTree};
  18
  19pub use chunk::{Chunk, ChunkSlice};
  20pub use offset_utf16::OffsetUtf16;
  21pub use point::Point;
  22pub use point_utf16::PointUtf16;
  23pub use unclipped::Unclipped;
  24
  25use crate::chunk::Bitmap;
  26
  27static LINE_SEPARATORS_REGEX: LazyLock<Regex> =
  28    LazyLock::new(|| Regex::new(r"\r\n|\r").expect("Failed to create LINE_SEPARATORS_REGEX"));
  29
  30#[derive(Clone, Copy, Debug, PartialEq)]
  31pub enum LineEnding {
  32    Unix,
  33    Windows,
  34}
  35
  36impl Default for LineEnding {
  37    fn default() -> Self {
  38        #[cfg(unix)]
  39        return Self::Unix;
  40
  41        #[cfg(not(unix))]
  42        return Self::Windows;
  43    }
  44}
  45
  46impl LineEnding {
  47    pub fn as_str(&self) -> &'static str {
  48        match self {
  49            LineEnding::Unix => "\n",
  50            LineEnding::Windows => "\r\n",
  51        }
  52    }
  53
  54    pub fn label(&self) -> &'static str {
  55        match self {
  56            LineEnding::Unix => "LF",
  57            LineEnding::Windows => "CRLF",
  58        }
  59    }
  60
  61    pub fn detect(text: &str) -> Self {
  62        let mut max_ix = cmp::min(text.len(), 1000);
  63        while !text.is_char_boundary(max_ix) {
  64            max_ix -= 1;
  65        }
  66
  67        if let Some(ix) = text[..max_ix].find(['\n']) {
  68            if ix > 0 && text.as_bytes()[ix - 1] == b'\r' {
  69                Self::Windows
  70            } else {
  71                Self::Unix
  72            }
  73        } else {
  74            Self::default()
  75        }
  76    }
  77
  78    pub fn normalize(text: &mut String) {
  79        if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(text, "\n") {
  80            *text = replaced;
  81        }
  82    }
  83
  84    pub fn normalize_arc(text: Arc<str>) -> Arc<str> {
  85        if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") {
  86            replaced.into()
  87        } else {
  88            text
  89        }
  90    }
  91
  92    pub fn normalize_cow(text: Cow<str>) -> Cow<str> {
  93        if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") {
  94            replaced.into()
  95        } else {
  96            text
  97        }
  98    }
  99
 100    /// Converts text chunks into a [`String`] using the current line ending.
 101    pub fn into_string(&self, chunks: Chunks<'_>) -> String {
 102        match self {
 103            LineEnding::Unix => chunks.collect(),
 104            LineEnding::Windows => {
 105                let line_ending = self.as_str();
 106                let mut result = String::new();
 107                for chunk in chunks {
 108                    result.push_str(&chunk.replace('\n', line_ending));
 109                }
 110                result
 111            }
 112        }
 113    }
 114}
 115
 116#[derive(Clone, Default)]
 117pub struct Rope {
 118    chunks: SumTree<Chunk>,
 119}
 120
 121impl Rope {
 122    pub fn new() -> Self {
 123        Self::default()
 124    }
 125
 126    /// Checks that `index`-th byte is the first byte in a UTF-8 code point
 127    /// sequence or the end of the string.
 128    ///
 129    /// The start and end of the string (when `index == self.len()`) are
 130    /// considered to be boundaries.
 131    ///
 132    /// Returns `false` if `index` is greater than `self.len()`.
 133    pub fn is_char_boundary(&self, offset: usize) -> bool {
 134        if self.chunks.is_empty() {
 135            return offset == 0;
 136        }
 137        let (start, _, item) = self.chunks.find::<usize, _>((), &offset, Bias::Left);
 138        let chunk_offset = offset - start;
 139        item.map(|chunk| chunk.is_char_boundary(chunk_offset))
 140            .unwrap_or(false)
 141    }
 142
 143    #[track_caller]
 144    #[inline(always)]
 145    pub fn assert_char_boundary(&self, offset: usize) {
 146        if self.chunks.is_empty() && offset == 0 {
 147            return;
 148        }
 149        let (start, _, item) = self.chunks.find::<usize, _>((), &offset, Bias::Left);
 150        match item {
 151            Some(chunk) => {
 152                let chunk_offset = offset - start;
 153                chunk.assert_char_boundary(chunk_offset);
 154            }
 155            None => {
 156                panic!(
 157                    "byte index {} is out of bounds of rope (length: {})",
 158                    offset,
 159                    self.len()
 160                );
 161            }
 162        }
 163    }
 164
 165    pub fn floor_char_boundary(&self, index: usize) -> usize {
 166        if index >= self.len() {
 167            self.len()
 168        } else {
 169            #[inline]
 170            pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
 171                // This is bit magic equivalent to: b < 128 || b >= 192
 172                (u8 as i8) >= -0x40
 173            }
 174
 175            let (start, _, item) = self.chunks.find::<usize, _>((), &index, Bias::Left);
 176            let chunk_offset = index - start;
 177            let lower_idx = item.map(|chunk| {
 178                let lower_bound = chunk_offset.saturating_sub(3);
 179                chunk
 180                    .text
 181                    .as_bytes()
 182                    .get(lower_bound..=chunk_offset)
 183                    .map(|it| {
 184                        let new_idx = it
 185                            .iter()
 186                            .rposition(|&b| is_utf8_char_boundary(b))
 187                            .unwrap_or(0);
 188                        lower_bound + new_idx
 189                    })
 190                    .unwrap_or(chunk.text.len())
 191            });
 192            lower_idx.map_or_else(|| self.len(), |idx| start + idx)
 193        }
 194    }
 195
 196    pub fn ceil_char_boundary(&self, index: usize) -> usize {
 197        if index > self.len() {
 198            self.len()
 199        } else {
 200            #[inline]
 201            pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
 202                // This is bit magic equivalent to: b < 128 || b >= 192
 203                (u8 as i8) >= -0x40
 204            }
 205
 206            let (start, _, item) = self.chunks.find::<usize, _>((), &index, Bias::Left);
 207            let chunk_offset = index - start;
 208            let upper_idx = item.map(|chunk| {
 209                let upper_bound = Ord::min(chunk_offset + 4, chunk.text.len());
 210                chunk.text.as_bytes()[chunk_offset..upper_bound]
 211                    .iter()
 212                    .position(|&b| is_utf8_char_boundary(b))
 213                    .map_or(upper_bound, |pos| pos + chunk_offset)
 214            });
 215
 216            upper_idx.map_or_else(|| self.len(), |idx| start + idx)
 217        }
 218    }
 219
 220    pub fn append(&mut self, rope: Rope) {
 221        if let Some(chunk) = rope.chunks.first()
 222            && (self
 223                .chunks
 224                .last()
 225                .is_some_and(|c| c.text.len() < chunk::MIN_BASE)
 226                || chunk.text.len() < chunk::MIN_BASE)
 227        {
 228            self.push_chunk(chunk.as_slice());
 229
 230            let mut chunks = rope.chunks.cursor::<()>(());
 231            chunks.next();
 232            chunks.next();
 233            self.chunks.append(chunks.suffix(), ());
 234        } else {
 235            self.chunks.append(rope.chunks, ());
 236        }
 237        self.check_invariants();
 238    }
 239
 240    pub fn replace(&mut self, range: Range<usize>, text: &str) {
 241        let mut new_rope = Rope::new();
 242        let mut cursor = self.cursor(0);
 243        new_rope.append(cursor.slice(range.start));
 244        cursor.seek_forward(range.end);
 245        new_rope.push(text);
 246        new_rope.append(cursor.suffix());
 247        *self = new_rope;
 248    }
 249
 250    pub fn slice(&self, range: Range<usize>) -> Rope {
 251        let mut cursor = self.cursor(0);
 252        cursor.seek_forward(range.start);
 253        cursor.slice(range.end)
 254    }
 255
 256    pub fn slice_rows(&self, range: Range<u32>) -> Rope {
 257        // This would be more efficient with a forward advance after the first, but it's fine.
 258        let start = self.point_to_offset(Point::new(range.start, 0));
 259        let end = self.point_to_offset(Point::new(range.end, 0));
 260        self.slice(start..end)
 261    }
 262
 263    pub fn push(&mut self, mut text: &str) {
 264        self.chunks.update_last(
 265            |last_chunk| {
 266                let split_ix = if last_chunk.text.len() + text.len() <= chunk::MAX_BASE {
 267                    text.len()
 268                } else {
 269                    let mut split_ix = cmp::min(
 270                        chunk::MIN_BASE.saturating_sub(last_chunk.text.len()),
 271                        text.len(),
 272                    );
 273                    while !text.is_char_boundary(split_ix) {
 274                        split_ix += 1;
 275                    }
 276                    split_ix
 277                };
 278
 279                let (suffix, remainder) = text.split_at(split_ix);
 280                last_chunk.push_str(suffix);
 281                text = remainder;
 282            },
 283            (),
 284        );
 285
 286        if text.len() > 2048 {
 287            return self.push_large(text);
 288        }
 289        let mut new_chunks = SmallVec::<[_; 16]>::new();
 290
 291        while !text.is_empty() {
 292            let mut split_ix = cmp::min(chunk::MAX_BASE, text.len());
 293            while !text.is_char_boundary(split_ix) {
 294                split_ix -= 1;
 295            }
 296            let (chunk, remainder) = text.split_at(split_ix);
 297            new_chunks.push(chunk);
 298            text = remainder;
 299        }
 300
 301        #[cfg(test)]
 302        const PARALLEL_THRESHOLD: usize = 4;
 303        #[cfg(not(test))]
 304        const PARALLEL_THRESHOLD: usize = 4 * (2 * sum_tree::TREE_BASE);
 305
 306        if new_chunks.len() >= PARALLEL_THRESHOLD {
 307            self.chunks
 308                .par_extend(new_chunks.into_vec().into_par_iter().map(Chunk::new), ());
 309        } else {
 310            self.chunks
 311                .extend(new_chunks.into_iter().map(Chunk::new), ());
 312        }
 313
 314        self.check_invariants();
 315    }
 316
 317    /// A copy of `push` specialized for working with large quantities of text.
 318    fn push_large(&mut self, mut text: &str) {
 319        // To avoid frequent reallocs when loading large swaths of file contents,
 320        // we estimate worst-case `new_chunks` capacity;
 321        // Chunk is a fixed-capacity buffer. If a character falls on
 322        // chunk boundary, we push it off to the following chunk (thus leaving a small bit of capacity unfilled in current chunk).
 323        // Worst-case chunk count when loading a file is then a case where every chunk ends up with that unused capacity.
 324        // Since we're working with UTF-8, each character is at most 4 bytes wide. It follows then that the worst case is where
 325        // a chunk ends with 3 bytes of a 4-byte character. These 3 bytes end up being stored in the following chunk, thus wasting
 326        // 3 bytes of storage in current chunk.
 327        // For example, a 1024-byte string can occupy between 32 (full ASCII, 1024/32) and 36 (full 4-byte UTF-8, 1024 / 29 rounded up) chunks.
 328        const MIN_CHUNK_SIZE: usize = chunk::MAX_BASE - 3;
 329
 330        // We also round up the capacity up by one, for a good measure; we *really* don't want to realloc here, as we assume that the # of characters
 331        // we're working with there is large.
 332        let capacity = text.len().div_ceil(MIN_CHUNK_SIZE);
 333        let mut new_chunks = Vec::with_capacity(capacity);
 334
 335        while !text.is_empty() {
 336            let mut split_ix = cmp::min(chunk::MAX_BASE, text.len());
 337            while !text.is_char_boundary(split_ix) {
 338                split_ix -= 1;
 339            }
 340            let (chunk, remainder) = text.split_at(split_ix);
 341            new_chunks.push(chunk);
 342            text = remainder;
 343        }
 344
 345        #[cfg(test)]
 346        const PARALLEL_THRESHOLD: usize = 4;
 347        #[cfg(not(test))]
 348        const PARALLEL_THRESHOLD: usize = 4 * (2 * sum_tree::TREE_BASE);
 349
 350        if new_chunks.len() >= PARALLEL_THRESHOLD {
 351            self.chunks
 352                .par_extend(new_chunks.into_par_iter().map(Chunk::new), ());
 353        } else {
 354            self.chunks
 355                .extend(new_chunks.into_iter().map(Chunk::new), ());
 356        }
 357
 358        self.check_invariants();
 359    }
 360
 361    fn push_chunk(&mut self, mut chunk: ChunkSlice) {
 362        self.chunks.update_last(
 363            |last_chunk| {
 364                let split_ix = if last_chunk.text.len() + chunk.len() <= chunk::MAX_BASE {
 365                    chunk.len()
 366                } else {
 367                    let mut split_ix = cmp::min(
 368                        chunk::MIN_BASE.saturating_sub(last_chunk.text.len()),
 369                        chunk.len(),
 370                    );
 371                    while !chunk.is_char_boundary(split_ix) {
 372                        split_ix += 1;
 373                    }
 374                    split_ix
 375                };
 376
 377                let (suffix, remainder) = chunk.split_at(split_ix);
 378                last_chunk.append(suffix);
 379                chunk = remainder;
 380            },
 381            (),
 382        );
 383
 384        if !chunk.is_empty() {
 385            self.chunks.push(chunk.into(), ());
 386        }
 387    }
 388
 389    pub fn push_front(&mut self, text: &str) {
 390        let suffix = mem::replace(self, Rope::from(text));
 391        self.append(suffix);
 392    }
 393
 394    fn check_invariants(&self) {
 395        #[cfg(test)]
 396        {
 397            // Ensure all chunks except maybe the last one are not underflowing.
 398            // Allow some wiggle room for multibyte characters at chunk boundaries.
 399            let mut chunks = self.chunks.cursor::<()>(()).peekable();
 400            while let Some(chunk) = chunks.next() {
 401                if chunks.peek().is_some() {
 402                    assert!(chunk.text.len() + 3 >= chunk::MIN_BASE);
 403                }
 404            }
 405        }
 406    }
 407
 408    pub fn summary(&self) -> TextSummary {
 409        self.chunks.summary().text
 410    }
 411
 412    pub fn len(&self) -> usize {
 413        self.chunks.extent(())
 414    }
 415
 416    pub fn is_empty(&self) -> bool {
 417        self.len() == 0
 418    }
 419
 420    pub fn max_point(&self) -> Point {
 421        self.chunks.extent(())
 422    }
 423
 424    pub fn max_point_utf16(&self) -> PointUtf16 {
 425        self.chunks.extent(())
 426    }
 427
 428    pub fn cursor(&self, offset: usize) -> Cursor<'_> {
 429        Cursor::new(self, offset)
 430    }
 431
 432    pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
 433        self.chars_at(0)
 434    }
 435
 436    pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 437        self.chunks_in_range(start..self.len()).flat_map(str::chars)
 438    }
 439
 440    pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
 441        self.reversed_chunks_in_range(0..start)
 442            .flat_map(|chunk| chunk.chars().rev())
 443    }
 444
 445    pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes<'_> {
 446        Bytes::new(self, range, false)
 447    }
 448
 449    pub fn reversed_bytes_in_range(&self, range: Range<usize>) -> Bytes<'_> {
 450        Bytes::new(self, range, true)
 451    }
 452
 453    pub fn chunks(&self) -> Chunks<'_> {
 454        self.chunks_in_range(0..self.len())
 455    }
 456
 457    pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks<'_> {
 458        Chunks::new(self, range, false)
 459    }
 460
 461    pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks<'_> {
 462        Chunks::new(self, range, true)
 463    }
 464
 465    /// Formats the rope's text with the specified line ending string.
 466    /// This replaces all `\n` characters with the provided line ending.
 467    ///
 468    /// The rope internally stores all line breaks as `\n` (see `Display` impl).
 469    /// Use this method to convert to different line endings for file operations,
 470    /// LSP communication, or other scenarios requiring specific line ending formats.
 471    pub fn to_string_with_line_ending(&self, line_ending: LineEnding) -> String {
 472        line_ending.into_string(self.chunks())
 473    }
 474
 475    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 476        if offset >= self.summary().len {
 477            return self.summary().len_utf16;
 478        }
 479        let (start, _, item) =
 480            self.chunks
 481                .find::<Dimensions<usize, OffsetUtf16>, _>((), &offset, Bias::Left);
 482        let overshoot = offset - start.0;
 483        start.1
 484            + item.map_or(Default::default(), |chunk| {
 485                chunk.as_slice().offset_to_offset_utf16(overshoot)
 486            })
 487    }
 488
 489    pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
 490        if offset >= self.summary().len_utf16 {
 491            return self.summary().len;
 492        }
 493        let (start, _, item) =
 494            self.chunks
 495                .find::<Dimensions<OffsetUtf16, usize>, _>((), &offset, Bias::Left);
 496        let overshoot = offset - start.0;
 497        start.1
 498            + item.map_or(Default::default(), |chunk| {
 499                chunk.as_slice().offset_utf16_to_offset(overshoot)
 500            })
 501    }
 502
 503    pub fn offset_to_point(&self, offset: usize) -> Point {
 504        if offset >= self.summary().len {
 505            return self.summary().lines;
 506        }
 507        let (start, _, item) =
 508            self.chunks
 509                .find::<Dimensions<usize, Point>, _>((), &offset, Bias::Left);
 510        let overshoot = offset - start.0;
 511        start.1
 512            + item.map_or(Point::zero(), |chunk| {
 513                chunk.as_slice().offset_to_point(overshoot)
 514            })
 515    }
 516
 517    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 518        if offset >= self.summary().len {
 519            return self.summary().lines_utf16();
 520        }
 521        let (start, _, item) =
 522            self.chunks
 523                .find::<Dimensions<usize, PointUtf16>, _>((), &offset, Bias::Left);
 524        let overshoot = offset - start.0;
 525        start.1
 526            + item.map_or(PointUtf16::zero(), |chunk| {
 527                chunk.as_slice().offset_to_point_utf16(overshoot)
 528            })
 529    }
 530
 531    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 532        if point >= self.summary().lines {
 533            return self.summary().lines_utf16();
 534        }
 535        let (start, _, item) =
 536            self.chunks
 537                .find::<Dimensions<Point, PointUtf16>, _>((), &point, Bias::Left);
 538        let overshoot = point - start.0;
 539        start.1
 540            + item.map_or(PointUtf16::zero(), |chunk| {
 541                chunk.as_slice().point_to_point_utf16(overshoot)
 542            })
 543    }
 544
 545    pub fn point_utf16_to_point(&self, point: PointUtf16) -> Point {
 546        if point >= self.summary().lines_utf16() {
 547            return self.summary().lines;
 548        }
 549        let mut cursor = self.chunks.cursor::<Dimensions<PointUtf16, Point>>(());
 550        cursor.seek(&point, Bias::Left);
 551        let overshoot = point - cursor.start().0;
 552        cursor.start().1
 553            + cursor.item().map_or(Point::zero(), |chunk| {
 554                chunk
 555                    .as_slice()
 556                    .offset_to_point(chunk.as_slice().point_utf16_to_offset(overshoot, false))
 557            })
 558    }
 559
 560    pub fn point_to_offset(&self, point: Point) -> usize {
 561        if point >= self.summary().lines {
 562            return self.summary().len;
 563        }
 564        let (start, _, item) =
 565            self.chunks
 566                .find::<Dimensions<Point, usize>, _>((), &point, Bias::Left);
 567        let overshoot = point - start.0;
 568        start.1 + item.map_or(0, |chunk| chunk.as_slice().point_to_offset(overshoot))
 569    }
 570
 571    pub fn point_to_offset_utf16(&self, point: Point) -> OffsetUtf16 {
 572        if point >= self.summary().lines {
 573            return self.summary().len_utf16;
 574        }
 575        let mut cursor = self.chunks.cursor::<Dimensions<Point, OffsetUtf16>>(());
 576        cursor.seek(&point, Bias::Left);
 577        let overshoot = point - cursor.start().0;
 578        cursor.start().1
 579            + cursor.item().map_or(OffsetUtf16(0), |chunk| {
 580                chunk.as_slice().point_to_offset_utf16(overshoot)
 581            })
 582    }
 583
 584    pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
 585        self.point_utf16_to_offset_impl(point, false)
 586    }
 587
 588    pub fn point_utf16_to_offset_utf16(&self, point: PointUtf16) -> OffsetUtf16 {
 589        self.point_utf16_to_offset_utf16_impl(point, false)
 590    }
 591
 592    pub fn unclipped_point_utf16_to_offset(&self, point: Unclipped<PointUtf16>) -> usize {
 593        self.point_utf16_to_offset_impl(point.0, true)
 594    }
 595
 596    fn point_utf16_to_offset_impl(&self, point: PointUtf16, clip: bool) -> usize {
 597        if point >= self.summary().lines_utf16() {
 598            return self.summary().len;
 599        }
 600        let (start, _, item) =
 601            self.chunks
 602                .find::<Dimensions<PointUtf16, usize>, _>((), &point, Bias::Left);
 603        let overshoot = point - start.0;
 604        start.1
 605            + item.map_or(0, |chunk| {
 606                chunk.as_slice().point_utf16_to_offset(overshoot, clip)
 607            })
 608    }
 609
 610    fn point_utf16_to_offset_utf16_impl(&self, point: PointUtf16, clip: bool) -> OffsetUtf16 {
 611        if point >= self.summary().lines_utf16() {
 612            return self.summary().len_utf16;
 613        }
 614        let mut cursor = self
 615            .chunks
 616            .cursor::<Dimensions<PointUtf16, OffsetUtf16>>(());
 617        cursor.seek(&point, Bias::Left);
 618        let overshoot = point - cursor.start().0;
 619        cursor.start().1
 620            + cursor.item().map_or(OffsetUtf16(0), |chunk| {
 621                chunk
 622                    .as_slice()
 623                    .offset_to_offset_utf16(chunk.as_slice().point_utf16_to_offset(overshoot, clip))
 624            })
 625    }
 626
 627    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 628        if point.0 >= self.summary().lines_utf16() {
 629            return self.summary().lines;
 630        }
 631        let (start, _, item) =
 632            self.chunks
 633                .find::<Dimensions<PointUtf16, Point>, _>((), &point.0, Bias::Left);
 634        let overshoot = Unclipped(point.0 - start.0);
 635        start.1
 636            + item.map_or(Point::zero(), |chunk| {
 637                chunk.as_slice().unclipped_point_utf16_to_point(overshoot)
 638            })
 639    }
 640
 641    pub fn clip_offset(&self, offset: usize, bias: Bias) -> usize {
 642        match bias {
 643            Bias::Left => self.floor_char_boundary(offset),
 644            Bias::Right => self.ceil_char_boundary(offset),
 645        }
 646    }
 647
 648    pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 649        let (start, _, item) = self.chunks.find::<OffsetUtf16, _>((), &offset, Bias::Right);
 650        if let Some(chunk) = item {
 651            let overshoot = offset - start;
 652            start + chunk.as_slice().clip_offset_utf16(overshoot, bias)
 653        } else {
 654            self.summary().len_utf16
 655        }
 656    }
 657
 658    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 659        let (start, _, item) = self.chunks.find::<Point, _>((), &point, Bias::Right);
 660        if let Some(chunk) = item {
 661            let overshoot = point - start;
 662            start + chunk.as_slice().clip_point(overshoot, bias)
 663        } else {
 664            self.summary().lines
 665        }
 666    }
 667
 668    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 669        let (start, _, item) = self.chunks.find::<PointUtf16, _>((), &point.0, Bias::Right);
 670        if let Some(chunk) = item {
 671            let overshoot = Unclipped(point.0 - start);
 672            start + chunk.as_slice().clip_point_utf16(overshoot, bias)
 673        } else {
 674            self.summary().lines_utf16()
 675        }
 676    }
 677
 678    pub fn line_len(&self, row: u32) -> u32 {
 679        self.clip_point(Point::new(row, u32::MAX), Bias::Left)
 680            .column
 681    }
 682}
 683
 684impl<'a> From<&'a str> for Rope {
 685    fn from(text: &'a str) -> Self {
 686        let mut rope = Self::new();
 687        rope.push(text);
 688        rope
 689    }
 690}
 691
 692impl<'a> FromIterator<&'a str> for Rope {
 693    fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
 694        let mut rope = Rope::new();
 695        for chunk in iter {
 696            rope.push(chunk);
 697        }
 698        rope
 699    }
 700}
 701
 702impl From<String> for Rope {
 703    #[inline(always)]
 704    fn from(text: String) -> Self {
 705        Rope::from(text.as_str())
 706    }
 707}
 708
 709impl From<&String> for Rope {
 710    #[inline(always)]
 711    fn from(text: &String) -> Self {
 712        Rope::from(text.as_str())
 713    }
 714}
 715
 716/// Display implementation for Rope.
 717///
 718/// Note: This always uses `\n` as the line separator, regardless of the original
 719/// file's line endings. The rope internally normalizes all line breaks to `\n`.
 720/// If you need to preserve original line endings (e.g., for LSP communication),
 721/// use `to_string_with_line_ending` instead.
 722impl fmt::Display for Rope {
 723    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 724        for chunk in self.chunks() {
 725            write!(f, "{chunk}")?;
 726        }
 727        Ok(())
 728    }
 729}
 730
 731impl fmt::Debug for Rope {
 732    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 733        use std::fmt::Write as _;
 734
 735        write!(f, "\"")?;
 736        let mut format_string = String::new();
 737        for chunk in self.chunks() {
 738            write!(&mut format_string, "{:?}", chunk)?;
 739            write!(f, "{}", &format_string[1..format_string.len() - 1])?;
 740            format_string.clear();
 741        }
 742        write!(f, "\"")?;
 743        Ok(())
 744    }
 745}
 746
 747pub struct Cursor<'a> {
 748    rope: &'a Rope,
 749    chunks: sum_tree::Cursor<'a, 'static, Chunk, usize>,
 750    offset: usize,
 751}
 752
 753impl<'a> Cursor<'a> {
 754    pub fn new(rope: &'a Rope, offset: usize) -> Self {
 755        let mut chunks = rope.chunks.cursor(());
 756        chunks.seek(&offset, Bias::Right);
 757        Self {
 758            rope,
 759            chunks,
 760            offset,
 761        }
 762    }
 763
 764    pub fn seek_forward(&mut self, end_offset: usize) {
 765        debug_assert!(end_offset >= self.offset);
 766
 767        self.chunks.seek_forward(&end_offset, Bias::Right);
 768        self.offset = end_offset;
 769    }
 770
 771    pub fn slice(&mut self, end_offset: usize) -> Rope {
 772        debug_assert!(
 773            end_offset >= self.offset,
 774            "cannot slice backwards from {} to {}",
 775            self.offset,
 776            end_offset
 777        );
 778
 779        let mut slice = Rope::new();
 780        if let Some(start_chunk) = self.chunks.item() {
 781            let start_ix = self.offset - self.chunks.start();
 782            let end_ix = cmp::min(end_offset, self.chunks.end()) - self.chunks.start();
 783            slice.push_chunk(start_chunk.slice(start_ix..end_ix));
 784        }
 785
 786        if end_offset > self.chunks.end() {
 787            self.chunks.next();
 788            slice.append(Rope {
 789                chunks: self.chunks.slice(&end_offset, Bias::Right),
 790            });
 791            if let Some(end_chunk) = self.chunks.item() {
 792                let end_ix = end_offset - self.chunks.start();
 793                slice.push_chunk(end_chunk.slice(0..end_ix));
 794            }
 795        }
 796
 797        self.offset = end_offset;
 798        slice
 799    }
 800
 801    pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
 802        debug_assert!(end_offset >= self.offset);
 803
 804        let mut summary = D::zero(());
 805        if let Some(start_chunk) = self.chunks.item() {
 806            let start_ix = self.offset - self.chunks.start();
 807            let end_ix = cmp::min(end_offset, self.chunks.end()) - self.chunks.start();
 808            summary.add_assign(&D::from_chunk(start_chunk.slice(start_ix..end_ix)));
 809        }
 810
 811        if end_offset > self.chunks.end() {
 812            self.chunks.next();
 813            summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right));
 814            if let Some(end_chunk) = self.chunks.item() {
 815                let end_ix = end_offset - self.chunks.start();
 816                summary.add_assign(&D::from_chunk(end_chunk.slice(0..end_ix)));
 817            }
 818        }
 819
 820        self.offset = end_offset;
 821        summary
 822    }
 823
 824    pub fn suffix(mut self) -> Rope {
 825        self.slice(self.rope.chunks.extent(()))
 826    }
 827
 828    pub fn offset(&self) -> usize {
 829        self.offset
 830    }
 831}
 832
 833pub struct ChunkBitmaps<'a> {
 834    /// A slice of text up to 128 bytes in size
 835    pub text: &'a str,
 836    /// Bitmap of character locations in text. LSB ordered
 837    pub chars: Bitmap,
 838    /// Bitmap of tab locations in text. LSB ordered
 839    pub tabs: Bitmap,
 840}
 841
 842#[derive(Clone)]
 843pub struct Chunks<'a> {
 844    chunks: sum_tree::Cursor<'a, 'static, Chunk, usize>,
 845    range: Range<usize>,
 846    offset: usize,
 847    reversed: bool,
 848}
 849
 850impl<'a> Chunks<'a> {
 851    pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
 852        let mut chunks = rope.chunks.cursor(());
 853        let offset = if reversed {
 854            chunks.seek(&range.end, Bias::Left);
 855            range.end
 856        } else {
 857            chunks.seek(&range.start, Bias::Right);
 858            range.start
 859        };
 860        let chunk_offset = offset - chunks.start();
 861        if let Some(chunk) = chunks.item()
 862            && !chunk.text.is_char_boundary(chunk_offset)
 863        {
 864            panic!("byte index {} is not a char boundary", offset);
 865        }
 866        Self {
 867            chunks,
 868            range,
 869            offset,
 870            reversed,
 871        }
 872    }
 873
 874    fn offset_is_valid(&self) -> bool {
 875        if self.reversed {
 876            if self.offset <= self.range.start || self.offset > self.range.end {
 877                return false;
 878            }
 879        } else if self.offset < self.range.start || self.offset >= self.range.end {
 880            return false;
 881        }
 882
 883        true
 884    }
 885
 886    pub fn offset(&self) -> usize {
 887        self.offset
 888    }
 889
 890    pub fn seek(&mut self, mut offset: usize) {
 891        offset = offset.clamp(self.range.start, self.range.end);
 892
 893        if self.reversed {
 894            if offset > self.chunks.end() {
 895                self.chunks.seek_forward(&offset, Bias::Left);
 896            } else if offset <= *self.chunks.start() {
 897                self.chunks.seek(&offset, Bias::Left);
 898            }
 899        } else {
 900            if offset >= self.chunks.end() {
 901                self.chunks.seek_forward(&offset, Bias::Right);
 902            } else if offset < *self.chunks.start() {
 903                self.chunks.seek(&offset, Bias::Right);
 904            }
 905        };
 906
 907        self.offset = offset;
 908    }
 909
 910    pub fn set_range(&mut self, range: Range<usize>) {
 911        self.range = range.clone();
 912        self.seek(range.start);
 913    }
 914
 915    /// Moves this cursor to the start of the next line in the rope.
 916    ///
 917    /// This method advances the cursor to the beginning of the next line.
 918    /// If the cursor is already at the end of the rope, this method does nothing.
 919    /// Reversed chunks iterators are not currently supported and will panic.
 920    ///
 921    /// Returns `true` if the cursor was successfully moved to the next line start,
 922    /// or `false` if the cursor was already at the end of the rope.
 923    pub fn next_line(&mut self) -> bool {
 924        assert!(!self.reversed);
 925
 926        let mut found = false;
 927        if let Some(chunk) = self.peek() {
 928            if let Some(newline_ix) = chunk.find('\n') {
 929                self.offset += newline_ix + 1;
 930                found = self.offset <= self.range.end;
 931            } else {
 932                self.chunks
 933                    .search_forward(|summary| summary.text.lines.row > 0);
 934                self.offset = *self.chunks.start();
 935
 936                if let Some(newline_ix) = self.peek().and_then(|chunk| chunk.find('\n')) {
 937                    self.offset += newline_ix + 1;
 938                    found = self.offset <= self.range.end;
 939                } else {
 940                    self.offset = self.chunks.end();
 941                }
 942            }
 943
 944            if self.offset == self.chunks.end() {
 945                self.next();
 946            }
 947        }
 948
 949        if self.offset > self.range.end {
 950            self.offset = cmp::min(self.offset, self.range.end);
 951            self.chunks.seek(&self.offset, Bias::Right);
 952        }
 953
 954        found
 955    }
 956
 957    /// Move this cursor to the preceding position in the rope that starts a new line.
 958    /// Reversed chunks iterators are not currently supported and will panic.
 959    ///
 960    /// If this cursor is not on the start of a line, it will be moved to the start of
 961    /// its current line. Otherwise it will be moved to the start of the previous line.
 962    /// It updates the cursor's position and returns true if a previous line was found,
 963    /// or false if the cursor was already at the start of the rope.
 964    pub fn prev_line(&mut self) -> bool {
 965        assert!(!self.reversed);
 966
 967        let initial_offset = self.offset;
 968
 969        if self.offset == *self.chunks.start() {
 970            self.chunks.prev();
 971        }
 972
 973        if let Some(chunk) = self.chunks.item() {
 974            let mut end_ix = self.offset - *self.chunks.start();
 975            if chunk.text.as_bytes()[end_ix - 1] == b'\n' {
 976                end_ix -= 1;
 977            }
 978
 979            if let Some(newline_ix) = chunk.text[..end_ix].rfind('\n') {
 980                self.offset = *self.chunks.start() + newline_ix + 1;
 981                if self.offset_is_valid() {
 982                    return true;
 983                }
 984            }
 985        }
 986
 987        self.chunks
 988            .search_backward(|summary| summary.text.lines.row > 0);
 989        self.offset = *self.chunks.start();
 990        if let Some(chunk) = self.chunks.item()
 991            && let Some(newline_ix) = chunk.text.rfind('\n')
 992        {
 993            self.offset += newline_ix + 1;
 994            if self.offset_is_valid() {
 995                if self.offset == self.chunks.end() {
 996                    self.chunks.next();
 997                }
 998
 999                return true;
1000            }
1001        }
1002
1003        if !self.offset_is_valid() || self.chunks.item().is_none() {
1004            self.offset = self.range.start;
1005            self.chunks.seek(&self.offset, Bias::Right);
1006        }
1007
1008        self.offset < initial_offset && self.offset == 0
1009    }
1010
1011    pub fn peek(&self) -> Option<&'a str> {
1012        if !self.offset_is_valid() {
1013            return None;
1014        }
1015
1016        let chunk = self.chunks.item()?;
1017        let chunk_start = *self.chunks.start();
1018        let slice_range = if self.reversed {
1019            let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
1020            let slice_end = self.offset - chunk_start;
1021            slice_start..slice_end
1022        } else {
1023            let slice_start = self.offset - chunk_start;
1024            let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start;
1025            slice_start..slice_end
1026        };
1027
1028        Some(&chunk.text[slice_range])
1029    }
1030
1031    /// Returns bitmaps that represent character positions and tab positions
1032    pub fn peek_with_bitmaps(&self) -> Option<ChunkBitmaps<'a>> {
1033        if !self.offset_is_valid() {
1034            return None;
1035        }
1036
1037        let chunk = self.chunks.item()?;
1038        let chunk_start = *self.chunks.start();
1039        let slice_range = if self.reversed {
1040            let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
1041            let slice_end = self.offset - chunk_start;
1042            slice_start..slice_end
1043        } else {
1044            let slice_start = self.offset - chunk_start;
1045            let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start;
1046            slice_start..slice_end
1047        };
1048        let chunk_start_offset = slice_range.start;
1049        let slice_text = &chunk.text[slice_range];
1050
1051        // Shift the tabs to align with our slice window
1052        let shifted_tabs = chunk.tabs() >> chunk_start_offset;
1053        let shifted_chars = chunk.chars() >> chunk_start_offset;
1054
1055        Some(ChunkBitmaps {
1056            text: slice_text,
1057            chars: shifted_chars,
1058            tabs: shifted_tabs,
1059        })
1060    }
1061
1062    pub fn lines(self) -> Lines<'a> {
1063        let reversed = self.reversed;
1064        Lines {
1065            chunks: self,
1066            current_line: String::new(),
1067            done: false,
1068            reversed,
1069        }
1070    }
1071
1072    pub fn equals_str(&self, other: &str) -> bool {
1073        let chunk = self.clone();
1074        if chunk.reversed {
1075            let mut offset = other.len();
1076            for chunk in chunk {
1077                if other[0..offset].ends_with(chunk) {
1078                    offset -= chunk.len();
1079                } else {
1080                    return false;
1081                }
1082            }
1083            if offset != 0 {
1084                return false;
1085            }
1086        } else {
1087            let mut offset = 0;
1088            for chunk in chunk {
1089                if offset >= other.len() {
1090                    return false;
1091                }
1092                if other[offset..].starts_with(chunk) {
1093                    offset += chunk.len();
1094                } else {
1095                    return false;
1096                }
1097            }
1098            if offset != other.len() {
1099                return false;
1100            }
1101        }
1102
1103        true
1104    }
1105}
1106
1107pub struct ChunkWithBitmaps<'a>(pub Chunks<'a>);
1108
1109impl<'a> Iterator for ChunkWithBitmaps<'a> {
1110    /// text, chars bitmap, tabs bitmap
1111    type Item = ChunkBitmaps<'a>;
1112
1113    fn next(&mut self) -> Option<Self::Item> {
1114        let chunk_bitmaps = self.0.peek_with_bitmaps()?;
1115        if self.0.reversed {
1116            self.0.offset -= chunk_bitmaps.text.len();
1117            if self.0.offset <= *self.0.chunks.start() {
1118                self.0.chunks.prev();
1119            }
1120        } else {
1121            self.0.offset += chunk_bitmaps.text.len();
1122            if self.0.offset >= self.0.chunks.end() {
1123                self.0.chunks.next();
1124            }
1125        }
1126
1127        Some(chunk_bitmaps)
1128    }
1129}
1130
1131impl<'a> Iterator for Chunks<'a> {
1132    type Item = &'a str;
1133
1134    fn next(&mut self) -> Option<Self::Item> {
1135        let chunk = self.peek()?;
1136        if self.reversed {
1137            self.offset -= chunk.len();
1138            if self.offset <= *self.chunks.start() {
1139                self.chunks.prev();
1140            }
1141        } else {
1142            self.offset += chunk.len();
1143            if self.offset >= self.chunks.end() {
1144                self.chunks.next();
1145            }
1146        }
1147
1148        Some(chunk)
1149    }
1150}
1151
1152pub struct Bytes<'a> {
1153    chunks: sum_tree::Cursor<'a, 'static, Chunk, usize>,
1154    range: Range<usize>,
1155    reversed: bool,
1156}
1157
1158impl<'a> Bytes<'a> {
1159    pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
1160        let mut chunks = rope.chunks.cursor(());
1161        if reversed {
1162            chunks.seek(&range.end, Bias::Left);
1163        } else {
1164            chunks.seek(&range.start, Bias::Right);
1165        }
1166        Self {
1167            chunks,
1168            range,
1169            reversed,
1170        }
1171    }
1172
1173    pub fn peek(&self) -> Option<&'a [u8]> {
1174        let chunk = self.chunks.item()?;
1175        if self.reversed && self.range.start >= self.chunks.end() {
1176            return None;
1177        }
1178        let chunk_start = *self.chunks.start();
1179        if self.range.end <= chunk_start {
1180            return None;
1181        }
1182        let start = self.range.start.saturating_sub(chunk_start);
1183        let end = self.range.end - chunk_start;
1184        Some(&chunk.text.as_bytes()[start..chunk.text.len().min(end)])
1185    }
1186}
1187
1188impl<'a> Iterator for Bytes<'a> {
1189    type Item = &'a [u8];
1190
1191    fn next(&mut self) -> Option<Self::Item> {
1192        let result = self.peek();
1193        if result.is_some() {
1194            if self.reversed {
1195                self.chunks.prev();
1196            } else {
1197                self.chunks.next();
1198            }
1199        }
1200        result
1201    }
1202}
1203
1204impl io::Read for Bytes<'_> {
1205    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1206        if let Some(chunk) = self.peek() {
1207            let len = cmp::min(buf.len(), chunk.len());
1208            if self.reversed {
1209                buf[..len].copy_from_slice(&chunk[chunk.len() - len..]);
1210                buf[..len].reverse();
1211                self.range.end -= len;
1212            } else {
1213                buf[..len].copy_from_slice(&chunk[..len]);
1214                self.range.start += len;
1215            }
1216
1217            if len == chunk.len() {
1218                if self.reversed {
1219                    self.chunks.prev();
1220                } else {
1221                    self.chunks.next();
1222                }
1223            }
1224            Ok(len)
1225        } else {
1226            Ok(0)
1227        }
1228    }
1229}
1230
1231pub struct Lines<'a> {
1232    chunks: Chunks<'a>,
1233    current_line: String,
1234    done: bool,
1235    reversed: bool,
1236}
1237
1238impl<'a> Lines<'a> {
1239    pub fn next(&mut self) -> Option<&str> {
1240        if self.done {
1241            return None;
1242        }
1243
1244        self.current_line.clear();
1245
1246        while let Some(chunk) = self.chunks.peek() {
1247            let chunk_lines = chunk.split('\n');
1248            if self.reversed {
1249                let mut chunk_lines = chunk_lines.rev().peekable();
1250                if let Some(chunk_line) = chunk_lines.next() {
1251                    let done = chunk_lines.peek().is_some();
1252                    if done {
1253                        self.chunks
1254                            .seek(self.chunks.offset() - chunk_line.len() - "\n".len());
1255                        if self.current_line.is_empty() {
1256                            return Some(chunk_line);
1257                        }
1258                    }
1259                    self.current_line.insert_str(0, chunk_line);
1260                    if done {
1261                        return Some(&self.current_line);
1262                    }
1263                }
1264            } else {
1265                let mut chunk_lines = chunk_lines.peekable();
1266                if let Some(chunk_line) = chunk_lines.next() {
1267                    let done = chunk_lines.peek().is_some();
1268                    if done {
1269                        self.chunks
1270                            .seek(self.chunks.offset() + chunk_line.len() + "\n".len());
1271                        if self.current_line.is_empty() {
1272                            return Some(chunk_line);
1273                        }
1274                    }
1275                    self.current_line.push_str(chunk_line);
1276                    if done {
1277                        return Some(&self.current_line);
1278                    }
1279                }
1280            }
1281
1282            self.chunks.next();
1283        }
1284
1285        self.done = true;
1286        Some(&self.current_line)
1287    }
1288
1289    pub fn seek(&mut self, offset: usize) {
1290        self.chunks.seek(offset);
1291        self.current_line.clear();
1292        self.done = false;
1293    }
1294
1295    pub fn offset(&self) -> usize {
1296        self.chunks.offset()
1297    }
1298}
1299
1300impl sum_tree::Item for Chunk {
1301    type Summary = ChunkSummary;
1302
1303    fn summary(&self, _cx: ()) -> Self::Summary {
1304        ChunkSummary {
1305            text: self.as_slice().text_summary(),
1306        }
1307    }
1308}
1309
1310#[derive(Clone, Debug, Default, Eq, PartialEq)]
1311pub struct ChunkSummary {
1312    text: TextSummary,
1313}
1314
1315impl sum_tree::ContextLessSummary for ChunkSummary {
1316    fn zero() -> Self {
1317        Default::default()
1318    }
1319
1320    fn add_summary(&mut self, summary: &Self) {
1321        self.text += &summary.text;
1322    }
1323}
1324
1325/// Summary of a string of text.
1326#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
1327pub struct TextSummary {
1328    /// Length in bytes.
1329    pub len: usize,
1330    /// Length in UTF-8.
1331    pub chars: usize,
1332    /// Length in UTF-16 code units
1333    pub len_utf16: OffsetUtf16,
1334    /// A point representing the number of lines and the length of the last line.
1335    ///
1336    /// In other words, it marks the point after the last byte in the text, (if
1337    /// EOF was a character, this would be its position).
1338    pub lines: Point,
1339    /// How many `char`s are in the first line
1340    pub first_line_chars: u32,
1341    /// How many `char`s are in the last line
1342    pub last_line_chars: u32,
1343    /// How many UTF-16 code units are in the last line
1344    pub last_line_len_utf16: u32,
1345    /// The row idx of the longest row
1346    pub longest_row: u32,
1347    /// How many `char`s are in the longest row
1348    pub longest_row_chars: u32,
1349}
1350
1351impl TextSummary {
1352    pub fn lines_utf16(&self) -> PointUtf16 {
1353        PointUtf16 {
1354            row: self.lines.row,
1355            column: self.last_line_len_utf16,
1356        }
1357    }
1358
1359    pub fn newline() -> Self {
1360        Self {
1361            len: 1,
1362            chars: 1,
1363            len_utf16: OffsetUtf16(1),
1364            first_line_chars: 0,
1365            last_line_chars: 0,
1366            last_line_len_utf16: 0,
1367            lines: Point::new(1, 0),
1368            longest_row: 0,
1369            longest_row_chars: 0,
1370        }
1371    }
1372
1373    pub fn add_newline(&mut self) {
1374        self.len += 1;
1375        self.len_utf16 += OffsetUtf16(self.len_utf16.0 + 1);
1376        self.last_line_chars = 0;
1377        self.last_line_len_utf16 = 0;
1378        self.lines += Point::new(1, 0);
1379    }
1380}
1381
1382impl<'a> From<&'a str> for TextSummary {
1383    fn from(text: &'a str) -> Self {
1384        let mut len_utf16 = OffsetUtf16(0);
1385        let mut lines = Point::new(0, 0);
1386        let mut first_line_chars = 0;
1387        let mut last_line_chars = 0;
1388        let mut last_line_len_utf16 = 0;
1389        let mut longest_row = 0;
1390        let mut longest_row_chars = 0;
1391        let mut chars = 0;
1392        for c in text.chars() {
1393            chars += 1;
1394            len_utf16.0 += c.len_utf16();
1395
1396            if c == '\n' {
1397                lines += Point::new(1, 0);
1398                last_line_len_utf16 = 0;
1399                last_line_chars = 0;
1400            } else {
1401                lines.column += c.len_utf8() as u32;
1402                last_line_len_utf16 += c.len_utf16() as u32;
1403                last_line_chars += 1;
1404            }
1405
1406            if lines.row == 0 {
1407                first_line_chars = last_line_chars;
1408            }
1409
1410            if last_line_chars > longest_row_chars {
1411                longest_row = lines.row;
1412                longest_row_chars = last_line_chars;
1413            }
1414        }
1415
1416        TextSummary {
1417            len: text.len(),
1418            chars,
1419            len_utf16,
1420            lines,
1421            first_line_chars,
1422            last_line_chars,
1423            last_line_len_utf16,
1424            longest_row,
1425            longest_row_chars,
1426        }
1427    }
1428}
1429
1430impl sum_tree::ContextLessSummary for TextSummary {
1431    fn zero() -> Self {
1432        Default::default()
1433    }
1434
1435    fn add_summary(&mut self, summary: &Self) {
1436        *self += summary;
1437    }
1438}
1439
1440impl ops::Add<Self> for TextSummary {
1441    type Output = Self;
1442
1443    fn add(mut self, rhs: Self) -> Self::Output {
1444        AddAssign::add_assign(&mut self, &rhs);
1445        self
1446    }
1447}
1448
1449impl<'a> ops::AddAssign<&'a Self> for TextSummary {
1450    fn add_assign(&mut self, other: &'a Self) {
1451        let joined_chars = self.last_line_chars + other.first_line_chars;
1452        if joined_chars > self.longest_row_chars {
1453            self.longest_row = self.lines.row;
1454            self.longest_row_chars = joined_chars;
1455        }
1456        if other.longest_row_chars > self.longest_row_chars {
1457            self.longest_row = self.lines.row + other.longest_row;
1458            self.longest_row_chars = other.longest_row_chars;
1459        }
1460
1461        if self.lines.row == 0 {
1462            self.first_line_chars += other.first_line_chars;
1463        }
1464
1465        if other.lines.row == 0 {
1466            self.last_line_chars += other.first_line_chars;
1467            self.last_line_len_utf16 += other.last_line_len_utf16;
1468        } else {
1469            self.last_line_chars = other.last_line_chars;
1470            self.last_line_len_utf16 = other.last_line_len_utf16;
1471        }
1472
1473        self.chars += other.chars;
1474        self.len += other.len;
1475        self.len_utf16 += other.len_utf16;
1476        self.lines += other.lines;
1477    }
1478}
1479
1480impl ops::AddAssign<Self> for TextSummary {
1481    fn add_assign(&mut self, other: Self) {
1482        *self += &other;
1483    }
1484}
1485
1486pub trait TextDimension:
1487    'static + Clone + Copy + Default + for<'a> Dimension<'a, ChunkSummary> + std::fmt::Debug
1488{
1489    fn from_text_summary(summary: &TextSummary) -> Self;
1490    fn from_chunk(chunk: ChunkSlice) -> Self;
1491    fn add_assign(&mut self, other: &Self);
1492}
1493
1494impl<D1: TextDimension, D2: TextDimension> TextDimension for Dimensions<D1, D2, ()> {
1495    fn from_text_summary(summary: &TextSummary) -> Self {
1496        Dimensions(
1497            D1::from_text_summary(summary),
1498            D2::from_text_summary(summary),
1499            (),
1500        )
1501    }
1502
1503    fn from_chunk(chunk: ChunkSlice) -> Self {
1504        Dimensions(D1::from_chunk(chunk), D2::from_chunk(chunk), ())
1505    }
1506
1507    fn add_assign(&mut self, other: &Self) {
1508        self.0.add_assign(&other.0);
1509        self.1.add_assign(&other.1);
1510    }
1511}
1512
1513impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
1514    fn zero(_cx: ()) -> Self {
1515        Default::default()
1516    }
1517
1518    fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1519        *self += &summary.text;
1520    }
1521}
1522
1523impl TextDimension for TextSummary {
1524    fn from_text_summary(summary: &TextSummary) -> Self {
1525        *summary
1526    }
1527
1528    fn from_chunk(chunk: ChunkSlice) -> Self {
1529        chunk.text_summary()
1530    }
1531
1532    fn add_assign(&mut self, other: &Self) {
1533        *self += other;
1534    }
1535}
1536
1537impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
1538    fn zero(_cx: ()) -> Self {
1539        Default::default()
1540    }
1541
1542    fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1543        *self += summary.text.len;
1544    }
1545}
1546
1547impl TextDimension for usize {
1548    fn from_text_summary(summary: &TextSummary) -> Self {
1549        summary.len
1550    }
1551
1552    fn from_chunk(chunk: ChunkSlice) -> Self {
1553        chunk.len()
1554    }
1555
1556    fn add_assign(&mut self, other: &Self) {
1557        *self += other;
1558    }
1559}
1560
1561impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
1562    fn zero(_cx: ()) -> Self {
1563        Default::default()
1564    }
1565
1566    fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1567        *self += summary.text.len_utf16;
1568    }
1569}
1570
1571impl TextDimension for OffsetUtf16 {
1572    fn from_text_summary(summary: &TextSummary) -> Self {
1573        summary.len_utf16
1574    }
1575
1576    fn from_chunk(chunk: ChunkSlice) -> Self {
1577        chunk.len_utf16()
1578    }
1579
1580    fn add_assign(&mut self, other: &Self) {
1581        *self += other;
1582    }
1583}
1584
1585impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
1586    fn zero(_cx: ()) -> Self {
1587        Default::default()
1588    }
1589
1590    fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1591        *self += summary.text.lines;
1592    }
1593}
1594
1595impl TextDimension for Point {
1596    fn from_text_summary(summary: &TextSummary) -> Self {
1597        summary.lines
1598    }
1599
1600    fn from_chunk(chunk: ChunkSlice) -> Self {
1601        chunk.lines()
1602    }
1603
1604    fn add_assign(&mut self, other: &Self) {
1605        *self += other;
1606    }
1607}
1608
1609impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
1610    fn zero(_cx: ()) -> Self {
1611        Default::default()
1612    }
1613
1614    fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1615        *self += summary.text.lines_utf16();
1616    }
1617}
1618
1619impl TextDimension for PointUtf16 {
1620    fn from_text_summary(summary: &TextSummary) -> Self {
1621        summary.lines_utf16()
1622    }
1623
1624    fn from_chunk(chunk: ChunkSlice) -> Self {
1625        PointUtf16 {
1626            row: chunk.lines().row,
1627            column: chunk.last_line_len_utf16(),
1628        }
1629    }
1630
1631    fn add_assign(&mut self, other: &Self) {
1632        *self += other;
1633    }
1634}
1635
1636/// A pair of text dimensions in which only the first dimension is used for comparison,
1637/// but both dimensions are updated during addition and subtraction.
1638#[derive(Clone, Copy, Debug)]
1639pub struct DimensionPair<K, V> {
1640    pub key: K,
1641    pub value: Option<V>,
1642}
1643
1644impl<K: Default, V: Default> Default for DimensionPair<K, V> {
1645    fn default() -> Self {
1646        Self {
1647            key: Default::default(),
1648            value: Some(Default::default()),
1649        }
1650    }
1651}
1652
1653impl<K, V> cmp::Ord for DimensionPair<K, V>
1654where
1655    K: cmp::Ord,
1656{
1657    fn cmp(&self, other: &Self) -> cmp::Ordering {
1658        self.key.cmp(&other.key)
1659    }
1660}
1661
1662impl<K, V> cmp::PartialOrd for DimensionPair<K, V>
1663where
1664    K: cmp::PartialOrd,
1665{
1666    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
1667        self.key.partial_cmp(&other.key)
1668    }
1669}
1670
1671impl<K, V> cmp::PartialEq for DimensionPair<K, V>
1672where
1673    K: cmp::PartialEq,
1674{
1675    fn eq(&self, other: &Self) -> bool {
1676        self.key.eq(&other.key)
1677    }
1678}
1679
1680impl<K, V> ops::Sub for DimensionPair<K, V>
1681where
1682    K: ops::Sub<K, Output = K>,
1683    V: ops::Sub<V, Output = V>,
1684{
1685    type Output = Self;
1686
1687    fn sub(self, rhs: Self) -> Self::Output {
1688        Self {
1689            key: self.key - rhs.key,
1690            value: self.value.zip(rhs.value).map(|(a, b)| a - b),
1691        }
1692    }
1693}
1694
1695impl<K, V> cmp::Eq for DimensionPair<K, V> where K: cmp::Eq {}
1696
1697impl<'a, K, V> sum_tree::Dimension<'a, ChunkSummary> for DimensionPair<K, V>
1698where
1699    K: sum_tree::Dimension<'a, ChunkSummary>,
1700    V: sum_tree::Dimension<'a, ChunkSummary>,
1701{
1702    fn zero(_cx: ()) -> Self {
1703        Self {
1704            key: K::zero(_cx),
1705            value: Some(V::zero(_cx)),
1706        }
1707    }
1708
1709    fn add_summary(&mut self, summary: &'a ChunkSummary, _cx: ()) {
1710        self.key.add_summary(summary, _cx);
1711        if let Some(value) = &mut self.value {
1712            value.add_summary(summary, _cx);
1713        }
1714    }
1715}
1716
1717impl<K, V> TextDimension for DimensionPair<K, V>
1718where
1719    K: TextDimension,
1720    V: TextDimension,
1721{
1722    fn add_assign(&mut self, other: &Self) {
1723        self.key.add_assign(&other.key);
1724        if let Some(value) = &mut self.value {
1725            if let Some(other_value) = other.value.as_ref() {
1726                value.add_assign(other_value);
1727            } else {
1728                self.value.take();
1729            }
1730        }
1731    }
1732
1733    fn from_chunk(chunk: ChunkSlice) -> Self {
1734        Self {
1735            key: K::from_chunk(chunk),
1736            value: Some(V::from_chunk(chunk)),
1737        }
1738    }
1739
1740    fn from_text_summary(summary: &TextSummary) -> Self {
1741        Self {
1742            key: K::from_text_summary(summary),
1743            value: Some(V::from_text_summary(summary)),
1744        }
1745    }
1746}
1747
1748#[cfg(test)]
1749mod tests {
1750    use super::*;
1751    use Bias::{Left, Right};
1752    use rand::prelude::*;
1753    use std::{cmp::Ordering, env, io::Read};
1754    use util::RandomCharIter;
1755
1756    #[ctor::ctor]
1757    fn init_logger() {
1758        zlog::init_test();
1759    }
1760
1761    #[test]
1762    fn test_all_4_byte_chars() {
1763        let mut rope = Rope::new();
1764        let text = "🏀".repeat(256);
1765        rope.push(&text);
1766        assert_eq!(rope.text(), text);
1767    }
1768
1769    #[test]
1770    fn test_clip() {
1771        let rope = Rope::from("🧘");
1772
1773        assert_eq!(rope.clip_offset(1, Bias::Left), 0);
1774        assert_eq!(rope.clip_offset(1, Bias::Right), 4);
1775        assert_eq!(rope.clip_offset(5, Bias::Right), 4);
1776
1777        assert_eq!(
1778            rope.clip_point(Point::new(0, 1), Bias::Left),
1779            Point::new(0, 0)
1780        );
1781        assert_eq!(
1782            rope.clip_point(Point::new(0, 1), Bias::Right),
1783            Point::new(0, 4)
1784        );
1785        assert_eq!(
1786            rope.clip_point(Point::new(0, 5), Bias::Right),
1787            Point::new(0, 4)
1788        );
1789
1790        assert_eq!(
1791            rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Left),
1792            PointUtf16::new(0, 0)
1793        );
1794        assert_eq!(
1795            rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Right),
1796            PointUtf16::new(0, 2)
1797        );
1798        assert_eq!(
1799            rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 3)), Bias::Right),
1800            PointUtf16::new(0, 2)
1801        );
1802
1803        assert_eq!(
1804            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
1805            OffsetUtf16(0)
1806        );
1807        assert_eq!(
1808            rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
1809            OffsetUtf16(2)
1810        );
1811        assert_eq!(
1812            rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
1813            OffsetUtf16(2)
1814        );
1815    }
1816
1817    #[test]
1818    fn test_prev_next_line() {
1819        let rope = Rope::from("abc\ndef\nghi\njkl");
1820
1821        let mut chunks = rope.chunks();
1822        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
1823
1824        assert!(chunks.next_line());
1825        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd');
1826
1827        assert!(chunks.next_line());
1828        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g');
1829
1830        assert!(chunks.next_line());
1831        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j');
1832
1833        assert!(!chunks.next_line());
1834        assert_eq!(chunks.peek(), None);
1835
1836        assert!(chunks.prev_line());
1837        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j');
1838
1839        assert!(chunks.prev_line());
1840        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g');
1841
1842        assert!(chunks.prev_line());
1843        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd');
1844
1845        assert!(chunks.prev_line());
1846        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
1847
1848        assert!(!chunks.prev_line());
1849        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
1850
1851        // Only return true when the cursor has moved to the start of a line
1852        let mut chunks = rope.chunks_in_range(5..7);
1853        chunks.seek(6);
1854        assert!(!chunks.prev_line());
1855        assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'e');
1856
1857        assert!(!chunks.next_line());
1858        assert_eq!(chunks.peek(), None);
1859    }
1860
1861    #[test]
1862    fn test_lines() {
1863        let rope = Rope::from("abc\ndefg\nhi");
1864        let mut lines = rope.chunks().lines();
1865        assert_eq!(lines.next(), Some("abc"));
1866        assert_eq!(lines.next(), Some("defg"));
1867        assert_eq!(lines.next(), Some("hi"));
1868        assert_eq!(lines.next(), None);
1869
1870        let rope = Rope::from("abc\ndefg\nhi\n");
1871        let mut lines = rope.chunks().lines();
1872        assert_eq!(lines.next(), Some("abc"));
1873        assert_eq!(lines.next(), Some("defg"));
1874        assert_eq!(lines.next(), Some("hi"));
1875        assert_eq!(lines.next(), Some(""));
1876        assert_eq!(lines.next(), None);
1877
1878        let rope = Rope::from("abc\ndefg\nhi");
1879        let mut lines = rope.reversed_chunks_in_range(0..rope.len()).lines();
1880        assert_eq!(lines.next(), Some("hi"));
1881        assert_eq!(lines.next(), Some("defg"));
1882        assert_eq!(lines.next(), Some("abc"));
1883        assert_eq!(lines.next(), None);
1884
1885        let rope = Rope::from("abc\ndefg\nhi\n");
1886        let mut lines = rope.reversed_chunks_in_range(0..rope.len()).lines();
1887        assert_eq!(lines.next(), Some(""));
1888        assert_eq!(lines.next(), Some("hi"));
1889        assert_eq!(lines.next(), Some("defg"));
1890        assert_eq!(lines.next(), Some("abc"));
1891        assert_eq!(lines.next(), None);
1892
1893        let rope = Rope::from("abc\nlonger line test\nhi");
1894        let mut lines = rope.chunks().lines();
1895        assert_eq!(lines.next(), Some("abc"));
1896        assert_eq!(lines.next(), Some("longer line test"));
1897        assert_eq!(lines.next(), Some("hi"));
1898        assert_eq!(lines.next(), None);
1899
1900        let rope = Rope::from("abc\nlonger line test\nhi");
1901        let mut lines = rope.reversed_chunks_in_range(0..rope.len()).lines();
1902        assert_eq!(lines.next(), Some("hi"));
1903        assert_eq!(lines.next(), Some("longer line test"));
1904        assert_eq!(lines.next(), Some("abc"));
1905        assert_eq!(lines.next(), None);
1906    }
1907
1908    #[gpui::test(iterations = 100)]
1909    fn test_random_rope(mut rng: StdRng) {
1910        let operations = env::var("OPERATIONS")
1911            .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1912            .unwrap_or(10);
1913
1914        let mut expected = String::new();
1915        let mut actual = Rope::new();
1916        for _ in 0..operations {
1917            let end_ix = clip_offset(&expected, rng.random_range(0..=expected.len()), Right);
1918            let start_ix = clip_offset(&expected, rng.random_range(0..=end_ix), Left);
1919            let len = rng.random_range(0..=64);
1920            let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1921
1922            let mut new_actual = Rope::new();
1923            let mut cursor = actual.cursor(0);
1924            new_actual.append(cursor.slice(start_ix));
1925            new_actual.push(&new_text);
1926            cursor.seek_forward(end_ix);
1927            new_actual.append(cursor.suffix());
1928            actual = new_actual;
1929
1930            expected.replace_range(start_ix..end_ix, &new_text);
1931
1932            assert_eq!(actual.text(), expected);
1933            log::info!("text: {:?}", expected);
1934
1935            for _ in 0..5 {
1936                let end_ix = clip_offset(&expected, rng.random_range(0..=expected.len()), Right);
1937                let start_ix = clip_offset(&expected, rng.random_range(0..=end_ix), Left);
1938
1939                let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1940                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1941
1942                let mut actual_text = String::new();
1943                actual
1944                    .bytes_in_range(start_ix..end_ix)
1945                    .read_to_string(&mut actual_text)
1946                    .unwrap();
1947                assert_eq!(actual_text, &expected[start_ix..end_ix]);
1948
1949                assert_eq!(
1950                    actual
1951                        .reversed_chunks_in_range(start_ix..end_ix)
1952                        .collect::<Vec<&str>>()
1953                        .into_iter()
1954                        .rev()
1955                        .collect::<String>(),
1956                    &expected[start_ix..end_ix]
1957                );
1958
1959                let mut expected_line_starts: Vec<_> = expected[start_ix..end_ix]
1960                    .match_indices('\n')
1961                    .map(|(index, _)| start_ix + index + 1)
1962                    .collect();
1963
1964                let mut chunks = actual.chunks_in_range(start_ix..end_ix);
1965
1966                let mut actual_line_starts = Vec::new();
1967                while chunks.next_line() {
1968                    actual_line_starts.push(chunks.offset());
1969                }
1970                assert_eq!(
1971                    actual_line_starts,
1972                    expected_line_starts,
1973                    "actual line starts != expected line starts when using next_line() for {:?} ({:?})",
1974                    &expected[start_ix..end_ix],
1975                    start_ix..end_ix
1976                );
1977
1978                if start_ix < end_ix
1979                    && (start_ix == 0 || expected.as_bytes()[start_ix - 1] == b'\n')
1980                {
1981                    expected_line_starts.insert(0, start_ix);
1982                }
1983                // Remove the last index if it starts at the end of the range.
1984                if expected_line_starts.last() == Some(&end_ix) {
1985                    expected_line_starts.pop();
1986                }
1987
1988                let mut actual_line_starts = Vec::new();
1989                while chunks.prev_line() {
1990                    actual_line_starts.push(chunks.offset());
1991                }
1992                actual_line_starts.reverse();
1993                assert_eq!(
1994                    actual_line_starts,
1995                    expected_line_starts,
1996                    "actual line starts != expected line starts when using prev_line() for {:?} ({:?})",
1997                    &expected[start_ix..end_ix],
1998                    start_ix..end_ix
1999                );
2000
2001                // Check that next_line/prev_line work correctly from random positions
2002                let mut offset = rng.random_range(start_ix..=end_ix);
2003                while !expected.is_char_boundary(offset) {
2004                    offset -= 1;
2005                }
2006                chunks.seek(offset);
2007
2008                for _ in 0..5 {
2009                    if rng.random() {
2010                        let expected_next_line_start = expected[offset..end_ix]
2011                            .find('\n')
2012                            .map(|newline_ix| offset + newline_ix + 1);
2013
2014                        let moved = chunks.next_line();
2015                        assert_eq!(
2016                            moved,
2017                            expected_next_line_start.is_some(),
2018                            "unexpected result from next_line after seeking to {} in range {:?} ({:?})",
2019                            offset,
2020                            start_ix..end_ix,
2021                            &expected[start_ix..end_ix]
2022                        );
2023                        if let Some(expected_next_line_start) = expected_next_line_start {
2024                            assert_eq!(
2025                                chunks.offset(),
2026                                expected_next_line_start,
2027                                "invalid position after seeking to {} in range {:?} ({:?})",
2028                                offset,
2029                                start_ix..end_ix,
2030                                &expected[start_ix..end_ix]
2031                            );
2032                        } else {
2033                            assert_eq!(
2034                                chunks.offset(),
2035                                end_ix,
2036                                "invalid position after seeking to {} in range {:?} ({:?})",
2037                                offset,
2038                                start_ix..end_ix,
2039                                &expected[start_ix..end_ix]
2040                            );
2041                        }
2042                    } else {
2043                        let search_end = if offset > 0 && expected.as_bytes()[offset - 1] == b'\n' {
2044                            offset - 1
2045                        } else {
2046                            offset
2047                        };
2048
2049                        let expected_prev_line_start = expected[..search_end]
2050                            .rfind('\n')
2051                            .and_then(|newline_ix| {
2052                                let line_start_ix = newline_ix + 1;
2053                                if line_start_ix >= start_ix {
2054                                    Some(line_start_ix)
2055                                } else {
2056                                    None
2057                                }
2058                            })
2059                            .or({
2060                                if offset > 0 && start_ix == 0 {
2061                                    Some(0)
2062                                } else {
2063                                    None
2064                                }
2065                            });
2066
2067                        let moved = chunks.prev_line();
2068                        assert_eq!(
2069                            moved,
2070                            expected_prev_line_start.is_some(),
2071                            "unexpected result from prev_line after seeking to {} in range {:?} ({:?})",
2072                            offset,
2073                            start_ix..end_ix,
2074                            &expected[start_ix..end_ix]
2075                        );
2076                        if let Some(expected_prev_line_start) = expected_prev_line_start {
2077                            assert_eq!(
2078                                chunks.offset(),
2079                                expected_prev_line_start,
2080                                "invalid position after seeking to {} in range {:?} ({:?})",
2081                                offset,
2082                                start_ix..end_ix,
2083                                &expected[start_ix..end_ix]
2084                            );
2085                        } else {
2086                            assert_eq!(
2087                                chunks.offset(),
2088                                start_ix,
2089                                "invalid position after seeking to {} in range {:?} ({:?})",
2090                                offset,
2091                                start_ix..end_ix,
2092                                &expected[start_ix..end_ix]
2093                            );
2094                        }
2095                    }
2096
2097                    assert!((start_ix..=end_ix).contains(&chunks.offset()));
2098                    if rng.random() {
2099                        offset = rng.random_range(start_ix..=end_ix);
2100                        while !expected.is_char_boundary(offset) {
2101                            offset -= 1;
2102                        }
2103                        chunks.seek(offset);
2104                    } else {
2105                        chunks.next();
2106                        offset = chunks.offset();
2107                        assert!((start_ix..=end_ix).contains(&chunks.offset()));
2108                    }
2109                }
2110            }
2111
2112            let mut offset_utf16 = OffsetUtf16(0);
2113            let mut point = Point::new(0, 0);
2114            let mut point_utf16 = PointUtf16::new(0, 0);
2115            for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
2116                assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
2117                assert_eq!(
2118                    actual.offset_to_point_utf16(ix),
2119                    point_utf16,
2120                    "offset_to_point_utf16({})",
2121                    ix
2122                );
2123                assert_eq!(
2124                    actual.point_to_offset(point),
2125                    ix,
2126                    "point_to_offset({:?})",
2127                    point
2128                );
2129                assert_eq!(
2130                    actual.point_utf16_to_offset(point_utf16),
2131                    ix,
2132                    "point_utf16_to_offset({:?})",
2133                    point_utf16
2134                );
2135                assert_eq!(
2136                    actual.offset_to_offset_utf16(ix),
2137                    offset_utf16,
2138                    "offset_to_offset_utf16({:?})",
2139                    ix
2140                );
2141                assert_eq!(
2142                    actual.offset_utf16_to_offset(offset_utf16),
2143                    ix,
2144                    "offset_utf16_to_offset({:?})",
2145                    offset_utf16
2146                );
2147                if ch == '\n' {
2148                    point += Point::new(1, 0);
2149                    point_utf16 += PointUtf16::new(1, 0);
2150                } else {
2151                    point.column += ch.len_utf8() as u32;
2152                    point_utf16.column += ch.len_utf16() as u32;
2153                }
2154                offset_utf16.0 += ch.len_utf16();
2155            }
2156
2157            let mut offset_utf16 = OffsetUtf16(0);
2158            let mut point_utf16 = Unclipped(PointUtf16::zero());
2159            for unit in expected.encode_utf16() {
2160                let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
2161                let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
2162                assert!(right_offset >= left_offset);
2163                // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
2164                actual.offset_utf16_to_offset(left_offset);
2165                actual.offset_utf16_to_offset(right_offset);
2166
2167                let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
2168                let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
2169                assert!(right_point >= left_point);
2170                // Ensure translating valid UTF-16 points to offsets doesn't panic.
2171                actual.point_utf16_to_offset(left_point);
2172                actual.point_utf16_to_offset(right_point);
2173
2174                offset_utf16.0 += 1;
2175                if unit == b'\n' as u16 {
2176                    point_utf16.0 += PointUtf16::new(1, 0);
2177                } else {
2178                    point_utf16.0 += PointUtf16::new(0, 1);
2179                }
2180            }
2181
2182            for _ in 0..5 {
2183                let end_ix = clip_offset(&expected, rng.random_range(0..=expected.len()), Right);
2184                let start_ix = clip_offset(&expected, rng.random_range(0..=end_ix), Left);
2185                assert_eq!(
2186                    actual.cursor(start_ix).summary::<TextSummary>(end_ix),
2187                    TextSummary::from(&expected[start_ix..end_ix])
2188                );
2189            }
2190
2191            let mut expected_longest_rows = Vec::new();
2192            let mut longest_line_len = -1_isize;
2193            for (row, line) in expected.split('\n').enumerate() {
2194                let row = row as u32;
2195                assert_eq!(
2196                    actual.line_len(row),
2197                    line.len() as u32,
2198                    "invalid line len for row {}",
2199                    row
2200                );
2201
2202                let line_char_count = line.chars().count() as isize;
2203                match line_char_count.cmp(&longest_line_len) {
2204                    Ordering::Less => {}
2205                    Ordering::Equal => expected_longest_rows.push(row),
2206                    Ordering::Greater => {
2207                        longest_line_len = line_char_count;
2208                        expected_longest_rows.clear();
2209                        expected_longest_rows.push(row);
2210                    }
2211                }
2212            }
2213
2214            let longest_row = actual.summary().longest_row;
2215            assert!(
2216                expected_longest_rows.contains(&longest_row),
2217                "incorrect longest row {}. expected {:?} with length {}",
2218                longest_row,
2219                expected_longest_rows,
2220                longest_line_len,
2221            );
2222        }
2223    }
2224
2225    #[test]
2226    fn test_chunks_equals_str() {
2227        let text = "This is a multi-chunk\n& multi-line test string!";
2228        let rope = Rope::from(text);
2229        for start in 0..text.len() {
2230            for end in start..text.len() {
2231                let range = start..end;
2232                let correct_substring = &text[start..end];
2233
2234                // Test that correct range returns true
2235                assert!(
2236                    rope.chunks_in_range(range.clone())
2237                        .equals_str(correct_substring)
2238                );
2239                assert!(
2240                    rope.reversed_chunks_in_range(range.clone())
2241                        .equals_str(correct_substring)
2242                );
2243
2244                // Test that all other ranges return false (unless they happen to match)
2245                for other_start in 0..text.len() {
2246                    for other_end in other_start..text.len() {
2247                        if other_start == start && other_end == end {
2248                            continue;
2249                        }
2250                        let other_substring = &text[other_start..other_end];
2251
2252                        // Only assert false if the substrings are actually different
2253                        if other_substring == correct_substring {
2254                            continue;
2255                        }
2256                        assert!(
2257                            !rope
2258                                .chunks_in_range(range.clone())
2259                                .equals_str(other_substring)
2260                        );
2261                        assert!(
2262                            !rope
2263                                .reversed_chunks_in_range(range.clone())
2264                                .equals_str(other_substring)
2265                        );
2266                    }
2267                }
2268            }
2269        }
2270
2271        let rope = Rope::from("");
2272        assert!(rope.chunks_in_range(0..0).equals_str(""));
2273        assert!(rope.reversed_chunks_in_range(0..0).equals_str(""));
2274        assert!(!rope.chunks_in_range(0..0).equals_str("foo"));
2275        assert!(!rope.reversed_chunks_in_range(0..0).equals_str("foo"));
2276    }
2277
2278    #[test]
2279    fn test_is_char_boundary() {
2280        let fixture = "";
2281        let rope = Rope::from("");
2282        for b in 0..=fixture.len() {
2283            assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
2284        }
2285        let fixture = "";
2286        let rope = Rope::from("");
2287        for b in 0..=fixture.len() {
2288            assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
2289        }
2290        let fixture = "🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️‍⚧️🏁🏳️‍🌈🏴‍☠️⛳️📬📭🏴🏳️🚩";
2291        let rope = Rope::from("🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️‍⚧️🏁🏳️‍🌈🏴‍☠️⛳️📬📭🏴🏳️🚩");
2292        for b in 0..=fixture.len() {
2293            assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
2294        }
2295    }
2296
2297    #[test]
2298    fn test_floor_char_boundary() {
2299        // polyfill of str::floor_char_boundary
2300        fn floor_char_boundary(str: &str, index: usize) -> usize {
2301            if index >= str.len() {
2302                str.len()
2303            } else {
2304                let lower_bound = index.saturating_sub(3);
2305                let new_index = str.as_bytes()[lower_bound..=index]
2306                    .iter()
2307                    .rposition(|b| (*b as i8) >= -0x40);
2308
2309                lower_bound + new_index.unwrap()
2310            }
2311        }
2312
2313        let fixture = "";
2314        let rope = Rope::from("");
2315        for b in 0..=fixture.len() {
2316            assert_eq!(
2317                rope.floor_char_boundary(b),
2318                floor_char_boundary(&fixture, b)
2319            );
2320        }
2321
2322        let fixture = "";
2323        let rope = Rope::from("");
2324        for b in 0..=fixture.len() {
2325            assert_eq!(
2326                rope.floor_char_boundary(b),
2327                floor_char_boundary(&fixture, b)
2328            );
2329        }
2330
2331        let fixture = "🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️‍⚧️🏁🏳️‍🌈🏴‍☠️⛳️📬📭🏴🏳️🚩";
2332        let rope = Rope::from("🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️‍⚧️🏁🏳️‍🌈🏴‍☠️⛳️📬📭🏴🏳️🚩");
2333        for b in 0..=fixture.len() {
2334            assert_eq!(
2335                rope.floor_char_boundary(b),
2336                floor_char_boundary(&fixture, b)
2337            );
2338        }
2339    }
2340
2341    #[test]
2342    fn test_ceil_char_boundary() {
2343        // polyfill of str::ceil_char_boundary
2344        fn ceil_char_boundary(str: &str, index: usize) -> usize {
2345            if index > str.len() {
2346                str.len()
2347            } else {
2348                let upper_bound = Ord::min(index + 4, str.len());
2349                str.as_bytes()[index..upper_bound]
2350                    .iter()
2351                    .position(|b| (*b as i8) >= -0x40)
2352                    .map_or(upper_bound, |pos| pos + index)
2353            }
2354        }
2355
2356        let fixture = "";
2357        let rope = Rope::from("");
2358        for b in 0..=fixture.len() {
2359            assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
2360        }
2361
2362        let fixture = "";
2363        let rope = Rope::from("");
2364        for b in 0..=fixture.len() {
2365            assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
2366        }
2367
2368        let fixture = "🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️‍⚧️🏁🏳️‍🌈🏴‍☠️⛳️📬📭🏴🏳️🚩";
2369        let rope = Rope::from("🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️‍⚧️🏁🏳️‍🌈🏴‍☠️⛳️📬📭🏴🏳️🚩");
2370        for b in 0..=fixture.len() {
2371            assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
2372        }
2373    }
2374
2375    #[test]
2376    fn test_to_string_with_line_ending() {
2377        // Test Unix line endings (no conversion)
2378        let rope = Rope::from("line1\nline2\nline3");
2379        assert_eq!(
2380            rope.to_string_with_line_ending(LineEnding::Unix),
2381            "line1\nline2\nline3"
2382        );
2383
2384        // Test Windows line endings
2385        assert_eq!(
2386            rope.to_string_with_line_ending(LineEnding::Windows),
2387            "line1\r\nline2\r\nline3"
2388        );
2389
2390        // Test empty rope
2391        let empty_rope = Rope::from("");
2392        assert_eq!(
2393            empty_rope.to_string_with_line_ending(LineEnding::Windows),
2394            ""
2395        );
2396
2397        // Test single line (no newlines)
2398        let single_line = Rope::from("single line");
2399        assert_eq!(
2400            single_line.to_string_with_line_ending(LineEnding::Windows),
2401            "single line"
2402        );
2403
2404        // Test rope ending with newline
2405        let ending_newline = Rope::from("line1\nline2\n");
2406        assert_eq!(
2407            ending_newline.to_string_with_line_ending(LineEnding::Windows),
2408            "line1\r\nline2\r\n"
2409        );
2410
2411        // Test large rope with multiple chunks
2412        let mut large_rope = Rope::new();
2413        for i in 0..100 {
2414            large_rope.push(&format!("line{}\n", i));
2415        }
2416        let result = large_rope.to_string_with_line_ending(LineEnding::Windows);
2417        assert!(result.contains("\r\n"));
2418        assert!(!result.contains("\n\n"));
2419        assert_eq!(result.matches("\r\n").count(), 100);
2420    }
2421
2422    fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
2423        while !text.is_char_boundary(offset) {
2424            match bias {
2425                Bias::Left => offset -= 1,
2426                Bias::Right => offset += 1,
2427            }
2428        }
2429        offset
2430    }
2431
2432    impl Rope {
2433        fn text(&self) -> String {
2434            let mut text = String::new();
2435            for chunk in self.chunks.cursor::<()>(()) {
2436                text.push_str(&chunk.text);
2437            }
2438            text
2439        }
2440    }
2441}