chunk.rs

   1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
   2use arrayvec::ArrayString;
   3use std::{cmp, ops::Range};
   4use sum_tree::Bias;
   5use unicode_segmentation::GraphemeCursor;
   6use util::debug_panic;
   7
   8pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
   9pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
  10
  11#[derive(Clone, Debug, Default)]
  12pub struct Chunk {
  13    /// If bit[i] is set, then the character at index i is the start of a UTF-8 character in the
  14    /// text.
  15    chars: u128,
  16    /// The number of set bits is the number of UTF-16 code units it would take to represent the
  17    /// text.
  18    ///
  19    /// Bit[i] is set if text[i] is the start of a UTF-8 character. If the character would
  20    /// take two UTF-16 code units, then bit[i+1] is also set. (Rust chars never take more
  21    /// than two UTF-16 code units.)
  22    chars_utf16: u128,
  23    /// If bit[i] is set, then the character at index i is an ascii newline.
  24    newlines: u128,
  25    /// If bit[i] is set, then the character at index i is an ascii tab.
  26    pub tabs: u128,
  27    pub text: ArrayString<MAX_BASE>,
  28}
  29
  30impl Chunk {
  31    #[inline(always)]
  32    pub fn new(text: &str) -> Self {
  33        let mut this = Chunk::default();
  34        this.push_str(text);
  35        this
  36    }
  37
  38    #[inline(always)]
  39    pub fn push_str(&mut self, text: &str) {
  40        for (char_ix, c) in text.char_indices() {
  41            let ix = self.text.len() + char_ix;
  42            self.chars |= 1 << ix;
  43            self.chars_utf16 |= 1 << ix;
  44            self.chars_utf16 |= (c.len_utf16() as u128) << ix;
  45            self.newlines |= ((c == '\n') as u128) << ix;
  46            self.tabs |= ((c == '\t') as u128) << ix;
  47        }
  48        self.text.push_str(text);
  49    }
  50
  51    #[inline(always)]
  52    pub fn append(&mut self, slice: ChunkSlice) {
  53        if slice.is_empty() {
  54            return;
  55        };
  56
  57        let base_ix = self.text.len();
  58        self.chars |= slice.chars << base_ix;
  59        self.chars_utf16 |= slice.chars_utf16 << base_ix;
  60        self.newlines |= slice.newlines << base_ix;
  61        self.tabs |= slice.tabs << base_ix;
  62        self.text.push_str(slice.text);
  63    }
  64
  65    #[inline(always)]
  66    pub fn as_slice(&self) -> ChunkSlice<'_> {
  67        ChunkSlice {
  68            chars: self.chars,
  69            chars_utf16: self.chars_utf16,
  70            newlines: self.newlines,
  71            tabs: self.tabs,
  72            text: &self.text,
  73        }
  74    }
  75
  76    #[inline(always)]
  77    pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
  78        self.as_slice().slice(range)
  79    }
  80
  81    #[inline(always)]
  82    pub const fn chars(&self) -> u128 {
  83        self.chars
  84    }
  85}
  86
  87#[derive(Clone, Copy, Debug)]
  88pub struct ChunkSlice<'a> {
  89    chars: u128,
  90    chars_utf16: u128,
  91    newlines: u128,
  92    tabs: u128,
  93    text: &'a str,
  94}
  95
  96impl Into<Chunk> for ChunkSlice<'_> {
  97    fn into(self) -> Chunk {
  98        Chunk {
  99            chars: self.chars,
 100            chars_utf16: self.chars_utf16,
 101            newlines: self.newlines,
 102            tabs: self.tabs,
 103            text: self.text.try_into().unwrap(),
 104        }
 105    }
 106}
 107
 108impl<'a> ChunkSlice<'a> {
 109    #[inline(always)]
 110    pub const fn is_empty(&self) -> bool {
 111        self.text.is_empty()
 112    }
 113
 114    #[inline(always)]
 115    pub const fn is_char_boundary(self, offset: usize) -> bool {
 116        self.text.is_char_boundary(offset)
 117    }
 118
 119    #[inline(always)]
 120    pub const fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
 121        if mid == MAX_BASE {
 122            let left = self;
 123            let right = ChunkSlice {
 124                chars: 0,
 125                chars_utf16: 0,
 126                newlines: 0,
 127                tabs: 0,
 128                text: "",
 129            };
 130            (left, right)
 131        } else {
 132            let mask = (1u128 << mid) - 1;
 133            let (left_text, right_text) = self.text.split_at(mid);
 134            let left = ChunkSlice {
 135                chars: self.chars & mask,
 136                chars_utf16: self.chars_utf16 & mask,
 137                newlines: self.newlines & mask,
 138                tabs: self.tabs & mask,
 139                text: left_text,
 140            };
 141            let right = ChunkSlice {
 142                chars: self.chars >> mid,
 143                chars_utf16: self.chars_utf16 >> mid,
 144                newlines: self.newlines >> mid,
 145                tabs: self.tabs >> mid,
 146                text: right_text,
 147            };
 148            (left, right)
 149        }
 150    }
 151
 152    #[inline(always)]
 153    pub fn slice(self, range: Range<usize>) -> Self {
 154        let mask = if range.end == MAX_BASE {
 155            u128::MAX
 156        } else {
 157            debug_assert!(
 158                self.is_char_boundary(range.end),
 159                "Invalid range end {} in {:?}",
 160                range.end,
 161                self
 162            );
 163            (1u128 << range.end) - 1
 164        };
 165        if range.start == MAX_BASE {
 166            Self {
 167                chars: 0,
 168                chars_utf16: 0,
 169                newlines: 0,
 170                tabs: 0,
 171                text: "",
 172            }
 173        } else {
 174            debug_assert!(
 175                self.is_char_boundary(range.start),
 176                "Invalid range start {} in {:?}",
 177                range.start,
 178                self
 179            );
 180            Self {
 181                chars: (self.chars & mask) >> range.start,
 182                chars_utf16: (self.chars_utf16 & mask) >> range.start,
 183                newlines: (self.newlines & mask) >> range.start,
 184                tabs: (self.tabs & mask) >> range.start,
 185                text: &self.text[range],
 186            }
 187        }
 188    }
 189
 190    #[inline(always)]
 191    pub fn text_summary(&self) -> TextSummary {
 192        let mut chars = 0;
 193        let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
 194        TextSummary {
 195            len: self.len(),
 196            chars,
 197            len_utf16: self.len_utf16(),
 198            lines: self.lines(),
 199            first_line_chars: self.first_line_chars(),
 200            last_line_chars: self.last_line_chars(),
 201            last_line_len_utf16: self.last_line_len_utf16(),
 202            longest_row,
 203            longest_row_chars,
 204        }
 205    }
 206
 207    /// Get length in bytes
 208    #[inline(always)]
 209    pub const fn len(&self) -> usize {
 210        self.text.len()
 211    }
 212
 213    /// Get length in UTF-16 code units
 214    #[inline(always)]
 215    pub const fn len_utf16(&self) -> OffsetUtf16 {
 216        OffsetUtf16(self.chars_utf16.count_ones() as usize)
 217    }
 218
 219    /// Get point representing number of lines and length of last line
 220    #[inline(always)]
 221    pub const fn lines(&self) -> Point {
 222        let row = self.newlines.count_ones();
 223        let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
 224        Point::new(row, column)
 225    }
 226
 227    /// Get number of chars in first line
 228    #[inline(always)]
 229    pub const fn first_line_chars(&self) -> u32 {
 230        if self.newlines == 0 {
 231            self.chars.count_ones()
 232        } else {
 233            let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
 234            (self.chars & mask).count_ones()
 235        }
 236    }
 237
 238    /// Get number of chars in last line
 239    #[inline(always)]
 240    pub const fn last_line_chars(&self) -> u32 {
 241        if self.newlines == 0 {
 242            self.chars.count_ones()
 243        } else {
 244            let mask = !(u128::MAX >> self.newlines.leading_zeros());
 245            (self.chars & mask).count_ones()
 246        }
 247    }
 248
 249    /// Get number of UTF-16 code units in last line
 250    #[inline(always)]
 251    pub const fn last_line_len_utf16(&self) -> u32 {
 252        if self.newlines == 0 {
 253            self.chars_utf16.count_ones()
 254        } else {
 255            let mask = !(u128::MAX >> self.newlines.leading_zeros());
 256            (self.chars_utf16 & mask).count_ones()
 257        }
 258    }
 259
 260    /// Get the longest row in the chunk and its length in characters.
 261    /// Calculate the total number of characters in the chunk along the way.
 262    #[inline(always)]
 263    pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
 264        let mut chars = self.chars;
 265        let mut newlines = self.newlines;
 266        *total_chars = 0;
 267        let mut row = 0;
 268        let mut longest_row = 0;
 269        let mut longest_row_chars = 0;
 270        while newlines > 0 {
 271            let newline_ix = newlines.trailing_zeros();
 272            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
 273            *total_chars += usize::from(row_chars);
 274            if row_chars > longest_row_chars {
 275                longest_row = row;
 276                longest_row_chars = row_chars;
 277            }
 278
 279            newlines >>= newline_ix;
 280            newlines >>= 1;
 281            chars >>= newline_ix;
 282            chars >>= 1;
 283            row += 1;
 284            *total_chars += 1;
 285        }
 286
 287        let row_chars = chars.count_ones() as u8;
 288        *total_chars += usize::from(row_chars);
 289        if row_chars > longest_row_chars {
 290            (row, row_chars as u32)
 291        } else {
 292            (longest_row, longest_row_chars as u32)
 293        }
 294    }
 295
 296    #[inline(always)]
 297    pub const fn offset_to_point(&self, offset: usize) -> Point {
 298        let mask = if offset == MAX_BASE {
 299            u128::MAX
 300        } else {
 301            (1u128 << offset) - 1
 302        };
 303        let row = (self.newlines & mask).count_ones();
 304        let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
 305        let column = (offset - newline_ix as usize) as u32;
 306        Point::new(row, column)
 307    }
 308
 309    #[inline(always)]
 310    pub fn point_to_offset(&self, point: Point) -> usize {
 311        if point.row > self.lines().row {
 312            debug_panic!(
 313                "point {:?} extends beyond rows for string {:?}",
 314                point,
 315                self.text
 316            );
 317            return self.len();
 318        }
 319
 320        let row_offset_range = self.offset_range_for_row(point.row);
 321        if point.column > row_offset_range.len() as u32 {
 322            debug_panic!(
 323                "point {:?} extends beyond row for string {:?}",
 324                point,
 325                self.text
 326            );
 327            row_offset_range.end
 328        } else {
 329            row_offset_range.start + point.column as usize
 330        }
 331    }
 332
 333    #[inline(always)]
 334    pub const fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 335        let mask = if offset == MAX_BASE {
 336            u128::MAX
 337        } else {
 338            (1u128 << offset) - 1
 339        };
 340        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
 341    }
 342
 343    #[inline(always)]
 344    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 345        if target.0 == 0 {
 346            0
 347        } else {
 348            let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
 349            if ix == MAX_BASE {
 350                MAX_BASE
 351            } else {
 352                let utf8_additional_len = cmp::min(
 353                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
 354                    self.text.len() - ix,
 355                );
 356                ix + utf8_additional_len
 357            }
 358        }
 359    }
 360
 361    #[inline(always)]
 362    pub const fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 363        let mask = if offset == MAX_BASE {
 364            u128::MAX
 365        } else {
 366            (1u128 << offset) - 1
 367        };
 368        let row = (self.newlines & mask).count_ones();
 369        let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
 370        let column = if newline_ix as usize == MAX_BASE {
 371            0
 372        } else {
 373            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
 374        };
 375        PointUtf16::new(row, column)
 376    }
 377
 378    #[inline(always)]
 379    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 380        self.offset_to_point_utf16(self.point_to_offset(point))
 381    }
 382
 383    #[inline(always)]
 384    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
 385        let lines = self.lines();
 386        if point.row > lines.row {
 387            if !clip {
 388                debug_panic!(
 389                    "point {:?} is beyond this chunk's extent {:?}",
 390                    point,
 391                    self.text
 392                );
 393            }
 394            return self.len();
 395        }
 396
 397        let row_offset_range = self.offset_range_for_row(point.row);
 398        let line = self.slice(row_offset_range.clone());
 399        if point.column > line.last_line_len_utf16() {
 400            if !clip {
 401                debug_panic!(
 402                    "point {:?} is beyond the end of the line in chunk {:?}",
 403                    point,
 404                    self.text
 405                );
 406            }
 407            return line.len();
 408        }
 409
 410        let mut offset = row_offset_range.start;
 411        if point.column > 0 {
 412            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
 413            if !self.text.is_char_boundary(offset) {
 414                offset -= 1;
 415                while !self.text.is_char_boundary(offset) {
 416                    offset -= 1;
 417                }
 418                if !clip {
 419                    debug_panic!(
 420                        "point {:?} is within character in chunk {:?}",
 421                        point,
 422                        self.text,
 423                    );
 424                }
 425            }
 426        }
 427        offset
 428    }
 429
 430    #[inline(always)]
 431    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 432        let max_point = self.lines();
 433        if point.0.row > max_point.row {
 434            return max_point;
 435        }
 436
 437        let row_offset_range = self.offset_range_for_row(point.0.row);
 438        let line = self.slice(row_offset_range);
 439        if point.0.column == 0 {
 440            Point::new(point.0.row, 0)
 441        } else if point.0.column >= line.len_utf16().0 as u32 {
 442            Point::new(point.0.row, line.len() as u32)
 443        } else {
 444            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
 445            while !line.text.is_char_boundary(column) {
 446                column -= 1;
 447            }
 448            Point::new(point.0.row, column as u32)
 449        }
 450    }
 451
 452    #[inline(always)]
 453    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 454        let max_point = self.lines();
 455        if point.row > max_point.row {
 456            return max_point;
 457        }
 458
 459        let line = self.slice(self.offset_range_for_row(point.row));
 460        if point.column == 0 {
 461            point
 462        } else if point.column >= line.len() as u32 {
 463            Point::new(point.row, line.len() as u32)
 464        } else {
 465            let mut column = point.column as usize;
 466            let bytes = line.text.as_bytes();
 467            if bytes[column - 1] < 128 && bytes[column] < 128 {
 468                return Point::new(point.row, column as u32);
 469            }
 470
 471            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
 472            loop {
 473                if line.is_char_boundary(column)
 474                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
 475                {
 476                    break;
 477                }
 478
 479                match bias {
 480                    Bias::Left => column -= 1,
 481                    Bias::Right => column += 1,
 482                }
 483                grapheme_cursor.set_cursor(column);
 484            }
 485            Point::new(point.row, column as u32)
 486        }
 487    }
 488
 489    #[inline(always)]
 490    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 491        let max_point = self.lines();
 492        if point.0.row > max_point.row {
 493            PointUtf16::new(max_point.row, self.last_line_len_utf16())
 494        } else {
 495            let line = self.slice(self.offset_range_for_row(point.0.row));
 496            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
 497            PointUtf16::new(point.0.row, column.0 as u32)
 498        }
 499    }
 500
 501    #[inline(always)]
 502    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 503        if target == OffsetUtf16::default() {
 504            OffsetUtf16::default()
 505        } else if target >= self.len_utf16() {
 506            self.len_utf16()
 507        } else {
 508            let mut offset = self.offset_utf16_to_offset(target);
 509            while !self.text.is_char_boundary(offset) {
 510                if bias == Bias::Left {
 511                    offset -= 1;
 512                } else {
 513                    offset += 1;
 514                }
 515            }
 516            self.offset_to_offset_utf16(offset)
 517        }
 518    }
 519
 520    #[inline(always)]
 521    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
 522        let row_start = if row > 0 {
 523            nth_set_bit(self.newlines, row as usize) + 1
 524        } else {
 525            0
 526        };
 527        let row_len = if row_start == MAX_BASE {
 528            0
 529        } else {
 530            cmp::min(
 531                (self.newlines >> row_start).trailing_zeros(),
 532                (self.text.len() - row_start) as u32,
 533            )
 534        };
 535        row_start..row_start + row_len as usize
 536    }
 537
 538    #[inline(always)]
 539    pub const fn tabs(&self) -> Tabs {
 540        Tabs {
 541            tabs: self.tabs,
 542            chars: self.chars,
 543        }
 544    }
 545}
 546
 547pub struct Tabs {
 548    tabs: u128,
 549    chars: u128,
 550}
 551
 552#[derive(Debug, PartialEq, Eq)]
 553pub struct TabPosition {
 554    pub byte_offset: usize,
 555    pub char_offset: usize,
 556}
 557
 558impl Iterator for Tabs {
 559    type Item = TabPosition;
 560
 561    fn next(&mut self) -> Option<Self::Item> {
 562        if self.tabs == 0 {
 563            return None;
 564        }
 565
 566        let tab_offset = self.tabs.trailing_zeros() as usize;
 567        let chars_mask = (1 << tab_offset) - 1;
 568        let char_offset = (self.chars & chars_mask).count_ones() as usize;
 569
 570        // Since tabs are 1 byte the tab offset is the same as the byte offset
 571        let position = TabPosition {
 572            byte_offset: tab_offset,
 573            char_offset,
 574        };
 575        // Remove the tab we've just seen
 576        self.tabs ^= 1 << tab_offset;
 577
 578        Some(position)
 579    }
 580}
 581
 582/// Finds the n-th bit that is set to 1.
 583#[inline(always)]
 584const fn nth_set_bit(v: u128, n: usize) -> usize {
 585    let low = v as u64;
 586    let high = (v >> 64) as u64;
 587
 588    let low_count = low.count_ones() as usize;
 589    if n > low_count {
 590        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
 591    } else {
 592        nth_set_bit_u64(low, n as u64) as usize
 593    }
 594}
 595
 596#[inline(always)]
 597const fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
 598    let v = v.reverse_bits();
 599    let mut s: u64 = 64;
 600
 601    // Parallel bit count intermediates
 602    let a = v - ((v >> 1) & (u64::MAX / 3));
 603    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
 604    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
 605    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
 606
 607    // Branchless select
 608    let t = (d >> 32) + (d >> 48);
 609    s -= (t.wrapping_sub(n) & 256) >> 3;
 610    n -= t & (t.wrapping_sub(n) >> 8);
 611
 612    let t = (d >> (s - 16)) & 0xff;
 613    s -= (t.wrapping_sub(n) & 256) >> 4;
 614    n -= t & (t.wrapping_sub(n) >> 8);
 615
 616    let t = (c >> (s - 8)) & 0xf;
 617    s -= (t.wrapping_sub(n) & 256) >> 5;
 618    n -= t & (t.wrapping_sub(n) >> 8);
 619
 620    let t = (b >> (s - 4)) & 0x7;
 621    s -= (t.wrapping_sub(n) & 256) >> 6;
 622    n -= t & (t.wrapping_sub(n) >> 8);
 623
 624    let t = (a >> (s - 2)) & 0x3;
 625    s -= (t.wrapping_sub(n) & 256) >> 7;
 626    n -= t & (t.wrapping_sub(n) >> 8);
 627
 628    let t = (v >> (s - 1)) & 0x1;
 629    s -= (t.wrapping_sub(n) & 256) >> 8;
 630
 631    65 - s - 1
 632}
 633
 634#[cfg(test)]
 635mod tests {
 636    use super::*;
 637    use rand::prelude::*;
 638    use util::RandomCharIter;
 639
 640    #[gpui::test(iterations = 100)]
 641    fn test_random_chunks(mut rng: StdRng) {
 642        let text = random_string_with_utf8_len(&mut rng, MAX_BASE);
 643        log::info!("Chunk: {:?}", text);
 644        let chunk = Chunk::new(&text);
 645        verify_chunk(chunk.as_slice(), &text);
 646
 647        // Verify Chunk::chars() bitmap
 648        let expected_chars = char_offsets(&text)
 649            .into_iter()
 650            .inspect(|i| assert!(*i < 128))
 651            .fold(0u128, |acc, i| acc | (1 << i));
 652        assert_eq!(chunk.chars(), expected_chars);
 653
 654        for _ in 0..10 {
 655            let mut start = rng.random_range(0..=chunk.text.len());
 656            let mut end = rng.random_range(start..=chunk.text.len());
 657            while !chunk.text.is_char_boundary(start) {
 658                start -= 1;
 659            }
 660            while !chunk.text.is_char_boundary(end) {
 661                end -= 1;
 662            }
 663            let range = start..end;
 664            log::info!("Range: {:?}", range);
 665            let text_slice = &text[range.clone()];
 666            let chunk_slice = chunk.slice(range);
 667            verify_chunk(chunk_slice, text_slice);
 668        }
 669    }
 670
 671    #[gpui::test(iterations = 100)]
 672    fn test_split_chunk_slice(mut rng: StdRng) {
 673        let text = &random_string_with_utf8_len(&mut rng, MAX_BASE);
 674        let chunk = Chunk::new(text);
 675        let offset = char_offsets_with_end(text)
 676            .into_iter()
 677            .choose(&mut rng)
 678            .unwrap();
 679        let (a, b) = chunk.as_slice().split_at(offset);
 680        let (a_str, b_str) = text.split_at(offset);
 681        verify_chunk(a, a_str);
 682        verify_chunk(b, b_str);
 683    }
 684
 685    #[gpui::test(iterations = 1000)]
 686    fn test_nth_set_bit_random(mut rng: StdRng) {
 687        let set_count = rng.random_range(0..=128);
 688        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
 689        set_bits.sort();
 690        let mut n = 0;
 691        for ix in set_bits.iter().copied() {
 692            n |= 1 << ix;
 693        }
 694
 695        for (mut ix, position) in set_bits.into_iter().enumerate() {
 696            ix += 1;
 697            assert_eq!(
 698                nth_set_bit(n, ix),
 699                position,
 700                "nth_set_bit({:0128b}, {})",
 701                n,
 702                ix
 703            );
 704        }
 705    }
 706
 707    /// Returns a (biased) random string whose UTF-8 length is no more than `len`.
 708    fn random_string_with_utf8_len(rng: &mut StdRng, len: usize) -> String {
 709        let mut str = String::new();
 710        let mut chars = RandomCharIter::new(rng);
 711        loop {
 712            let ch = chars.next().unwrap();
 713            if str.len() + ch.len_utf8() > len {
 714                break;
 715            }
 716            str.push(ch);
 717        }
 718        str
 719    }
 720
 721    #[gpui::test(iterations = 1000)]
 722    fn test_append_random_strings(mut rng: StdRng) {
 723        let len1 = rng.random_range(0..=MAX_BASE);
 724        let len2 = rng.random_range(0..=MAX_BASE).saturating_sub(len1);
 725        let str1 = random_string_with_utf8_len(&mut rng, len1);
 726        let str2 = random_string_with_utf8_len(&mut rng, len2);
 727        let mut chunk1 = Chunk::new(&str1);
 728        let chunk2 = Chunk::new(&str2);
 729        let char_offsets = char_offsets_with_end(&str2);
 730        let start_index = rng.random_range(0..char_offsets.len());
 731        let start_offset = char_offsets[start_index];
 732        let end_offset = char_offsets[rng.random_range(start_index..char_offsets.len())];
 733        chunk1.append(chunk2.slice(start_offset..end_offset));
 734        verify_chunk(chunk1.as_slice(), &(str1 + &str2[start_offset..end_offset]));
 735    }
 736
 737    /// Return the byte offsets for each character in a string.
 738    ///
 739    /// These are valid offsets to split the string.
 740    fn char_offsets(text: &str) -> Vec<usize> {
 741        text.char_indices().map(|(i, _c)| i).collect()
 742    }
 743
 744    /// Return the byte offsets for each character in a string, plus the offset
 745    /// past the end of the string.
 746    fn char_offsets_with_end(text: &str) -> Vec<usize> {
 747        let mut v = char_offsets(text);
 748        v.push(text.len());
 749        v
 750    }
 751
 752    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
 753        let mut offset = 0;
 754        let mut offset_utf16 = OffsetUtf16(0);
 755        let mut point = Point::zero();
 756        let mut point_utf16 = PointUtf16::zero();
 757
 758        log::info!("Verifying chunk {:?}", text);
 759        assert_eq!(chunk.offset_to_point(0), Point::zero());
 760
 761        let mut expected_tab_positions = Vec::new();
 762
 763        for (char_offset, c) in text.chars().enumerate() {
 764            let expected_point = chunk.offset_to_point(offset);
 765            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
 766            assert_eq!(
 767                chunk.point_to_offset(point),
 768                offset,
 769                "mismatch at point {:?}",
 770                point
 771            );
 772            assert_eq!(
 773                chunk.offset_to_offset_utf16(offset),
 774                offset_utf16,
 775                "mismatch at offset {}",
 776                offset
 777            );
 778            assert_eq!(
 779                chunk.offset_utf16_to_offset(offset_utf16),
 780                offset,
 781                "mismatch at offset_utf16 {:?}",
 782                offset_utf16
 783            );
 784            assert_eq!(
 785                chunk.point_to_point_utf16(point),
 786                point_utf16,
 787                "mismatch at point {:?}",
 788                point
 789            );
 790            assert_eq!(
 791                chunk.point_utf16_to_offset(point_utf16, false),
 792                offset,
 793                "mismatch at point_utf16 {:?}",
 794                point_utf16
 795            );
 796            assert_eq!(
 797                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 798                point,
 799                "mismatch for unclipped_point_utf16_to_point at {:?}",
 800                point_utf16
 801            );
 802
 803            assert_eq!(
 804                chunk.clip_point(point, Bias::Left),
 805                point,
 806                "incorrect left clip at {:?}",
 807                point
 808            );
 809            assert_eq!(
 810                chunk.clip_point(point, Bias::Right),
 811                point,
 812                "incorrect right clip at {:?}",
 813                point
 814            );
 815
 816            for i in 1..c.len_utf8() {
 817                let test_point = Point::new(point.row, point.column + i as u32);
 818                assert_eq!(
 819                    chunk.clip_point(test_point, Bias::Left),
 820                    point,
 821                    "incorrect left clip within multi-byte char at {:?}",
 822                    test_point
 823                );
 824                assert_eq!(
 825                    chunk.clip_point(test_point, Bias::Right),
 826                    Point::new(point.row, point.column + c.len_utf8() as u32),
 827                    "incorrect right clip within multi-byte char at {:?}",
 828                    test_point
 829                );
 830            }
 831
 832            for i in 1..c.len_utf16() {
 833                let test_point = Unclipped(PointUtf16::new(
 834                    point_utf16.row,
 835                    point_utf16.column + i as u32,
 836                ));
 837                assert_eq!(
 838                    chunk.unclipped_point_utf16_to_point(test_point),
 839                    point,
 840                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
 841                    test_point
 842                );
 843                assert_eq!(
 844                    chunk.clip_point_utf16(test_point, Bias::Left),
 845                    point_utf16,
 846                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
 847                    test_point
 848                );
 849                assert_eq!(
 850                    chunk.clip_point_utf16(test_point, Bias::Right),
 851                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
 852                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
 853                    test_point
 854                );
 855
 856                let test_offset = OffsetUtf16(offset_utf16.0 + i);
 857                assert_eq!(
 858                    chunk.clip_offset_utf16(test_offset, Bias::Left),
 859                    offset_utf16,
 860                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
 861                    test_offset
 862                );
 863                assert_eq!(
 864                    chunk.clip_offset_utf16(test_offset, Bias::Right),
 865                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
 866                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
 867                    test_offset
 868                );
 869            }
 870
 871            if c == '\n' {
 872                point.row += 1;
 873                point.column = 0;
 874                point_utf16.row += 1;
 875                point_utf16.column = 0;
 876            } else {
 877                point.column += c.len_utf8() as u32;
 878                point_utf16.column += c.len_utf16() as u32;
 879            }
 880
 881            if c == '\t' {
 882                expected_tab_positions.push(TabPosition {
 883                    byte_offset: offset,
 884                    char_offset,
 885                });
 886            }
 887
 888            offset += c.len_utf8();
 889            offset_utf16.0 += c.len_utf16();
 890        }
 891
 892        let final_point = chunk.offset_to_point(offset);
 893        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
 894        assert_eq!(
 895            chunk.point_to_offset(point),
 896            offset,
 897            "mismatch at point {:?}",
 898            point
 899        );
 900        assert_eq!(
 901            chunk.offset_to_offset_utf16(offset),
 902            offset_utf16,
 903            "mismatch at offset {}",
 904            offset
 905        );
 906        assert_eq!(
 907            chunk.offset_utf16_to_offset(offset_utf16),
 908            offset,
 909            "mismatch at offset_utf16 {:?}",
 910            offset_utf16
 911        );
 912        assert_eq!(
 913            chunk.point_to_point_utf16(point),
 914            point_utf16,
 915            "mismatch at final point {:?}",
 916            point
 917        );
 918        assert_eq!(
 919            chunk.point_utf16_to_offset(point_utf16, false),
 920            offset,
 921            "mismatch at final point_utf16 {:?}",
 922            point_utf16
 923        );
 924        assert_eq!(
 925            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 926            point,
 927            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
 928            point_utf16
 929        );
 930        assert_eq!(
 931            chunk.clip_point(point, Bias::Left),
 932            point,
 933            "incorrect left clip at final point {:?}",
 934            point
 935        );
 936        assert_eq!(
 937            chunk.clip_point(point, Bias::Right),
 938            point,
 939            "incorrect right clip at final point {:?}",
 940            point
 941        );
 942        assert_eq!(
 943            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
 944            point_utf16,
 945            "incorrect left clip_point_utf16 at final point {:?}",
 946            point_utf16
 947        );
 948        assert_eq!(
 949            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
 950            point_utf16,
 951            "incorrect right clip_point_utf16 at final point {:?}",
 952            point_utf16
 953        );
 954        assert_eq!(
 955            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
 956            offset_utf16,
 957            "incorrect left clip_offset_utf16 at final offset {:?}",
 958            offset_utf16
 959        );
 960        assert_eq!(
 961            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
 962            offset_utf16,
 963            "incorrect right clip_offset_utf16 at final offset {:?}",
 964            offset_utf16
 965        );
 966
 967        // Verify length methods
 968        assert_eq!(chunk.len(), text.len());
 969        assert_eq!(
 970            chunk.len_utf16().0,
 971            text.chars().map(|c| c.len_utf16()).sum::<usize>()
 972        );
 973
 974        // Verify line counting
 975        let lines = chunk.lines();
 976        let mut newline_count = 0;
 977        let mut last_line_len = 0;
 978        for c in text.chars() {
 979            if c == '\n' {
 980                newline_count += 1;
 981                last_line_len = 0;
 982            } else {
 983                last_line_len += c.len_utf8() as u32;
 984            }
 985        }
 986        assert_eq!(lines, Point::new(newline_count, last_line_len));
 987
 988        // Verify first/last line chars
 989        if !text.is_empty() {
 990            let first_line = text.split('\n').next().unwrap();
 991            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
 992
 993            let last_line = text.split('\n').next_back().unwrap();
 994            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
 995            assert_eq!(
 996                chunk.last_line_len_utf16(),
 997                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
 998            );
 999        }
1000
1001        // Verify longest row
1002        let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
1003        let mut max_chars = 0;
1004        let mut current_row = 0;
1005        let mut current_chars = 0;
1006        let mut max_row = 0;
1007
1008        for c in text.chars() {
1009            if c == '\n' {
1010                if current_chars > max_chars {
1011                    max_chars = current_chars;
1012                    max_row = current_row;
1013                }
1014                current_row += 1;
1015                current_chars = 0;
1016            } else {
1017                current_chars += 1;
1018            }
1019        }
1020
1021        if current_chars > max_chars {
1022            max_chars = current_chars;
1023            max_row = current_row;
1024        }
1025
1026        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
1027        assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
1028    }
1029}