chunk.rs

   1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
   2use arrayvec::ArrayString;
   3use std::{cmp, ops::Range};
   4use sum_tree::Bias;
   5use unicode_segmentation::GraphemeCursor;
   6use util::debug_panic;
   7
   8#[cfg(not(all(test, not(rust_analyzer))))]
   9pub(crate) type Bitmap = u128;
  10#[cfg(all(test, not(rust_analyzer)))]
  11pub(crate) type Bitmap = u16;
  12
  13pub(crate) const MIN_BASE: usize = MAX_BASE / 2;
  14pub(crate) const MAX_BASE: usize = Bitmap::BITS as usize;
  15
  16#[derive(Clone, Debug, Default)]
  17pub struct Chunk {
  18    /// If bit[i] is set, then the character at index i is the start of a UTF-8 character in the
  19    /// text.
  20    chars: Bitmap,
  21    /// The number of set bits is the number of UTF-16 code units it would take to represent the
  22    /// text.
  23    ///
  24    /// Bit[i] is set if text[i] is the start of a UTF-8 character. If the character would
  25    /// take two UTF-16 code units, then bit[i+1] is also set. (Rust chars never take more
  26    /// than two UTF-16 code units.)
  27    chars_utf16: Bitmap,
  28    /// If bit[i] is set, then the character at index i is an ascii newline.
  29    newlines: Bitmap,
  30    /// If bit[i] is set, then the character at index i is an ascii tab.
  31    tabs: Bitmap,
  32    pub text: ArrayString<MAX_BASE>,
  33}
  34
  35impl Chunk {
  36    pub const MASK_BITS: usize = Bitmap::BITS as usize;
  37
  38    #[inline(always)]
  39    pub fn new(text: &str) -> Self {
  40        let mut this = Chunk::default();
  41        this.push_str(text);
  42        this
  43    }
  44
  45    #[inline(always)]
  46    pub fn push_str(&mut self, text: &str) {
  47        for (char_ix, c) in text.char_indices() {
  48            let ix = self.text.len() + char_ix;
  49            self.chars |= 1 << ix;
  50            self.chars_utf16 |= 1 << ix;
  51            self.chars_utf16 |= (c.len_utf16() as Bitmap) << ix;
  52            self.newlines |= ((c == '\n') as Bitmap) << ix;
  53            self.tabs |= ((c == '\t') as Bitmap) << ix;
  54        }
  55        self.text.push_str(text);
  56    }
  57
  58    #[inline(always)]
  59    pub fn append(&mut self, slice: ChunkSlice) {
  60        if slice.is_empty() {
  61            return;
  62        };
  63
  64        let base_ix = self.text.len();
  65        self.chars |= slice.chars << base_ix;
  66        self.chars_utf16 |= slice.chars_utf16 << base_ix;
  67        self.newlines |= slice.newlines << base_ix;
  68        self.tabs |= slice.tabs << base_ix;
  69        self.text.push_str(slice.text);
  70    }
  71
  72    #[inline(always)]
  73    pub fn as_slice(&self) -> ChunkSlice<'_> {
  74        ChunkSlice {
  75            chars: self.chars,
  76            chars_utf16: self.chars_utf16,
  77            newlines: self.newlines,
  78            tabs: self.tabs,
  79            text: &self.text,
  80        }
  81    }
  82
  83    #[inline(always)]
  84    pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
  85        self.as_slice().slice(range)
  86    }
  87
  88    #[inline(always)]
  89    pub fn chars(&self) -> Bitmap {
  90        self.chars
  91    }
  92
  93    pub fn tabs(&self) -> Bitmap {
  94        self.tabs
  95    }
  96
  97    #[inline(always)]
  98    pub fn is_char_boundary(&self, offset: usize) -> bool {
  99        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 100    }
 101
 102    pub fn floor_char_boundary(&self, index: usize) -> usize {
 103        #[inline]
 104        pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
 105            // This is bit magic equivalent to: b < 128 || b >= 192
 106            (u8 as i8) >= -0x40
 107        }
 108
 109        if index >= self.text.len() {
 110            self.text.len()
 111        } else {
 112            let mut i = index;
 113            while i > 0 {
 114                if is_utf8_char_boundary(self.text.as_bytes()[i]) {
 115                    break;
 116                }
 117                i -= 1;
 118            }
 119
 120            i
 121        }
 122    }
 123
 124    #[track_caller]
 125    #[inline(always)]
 126    pub fn assert_char_boundary(&self, offset: usize) {
 127        if self.is_char_boundary(offset) {
 128            return;
 129        }
 130        panic_char_boundary(self, offset);
 131
 132        #[cold]
 133        #[inline(never)]
 134        #[track_caller]
 135        fn panic_char_boundary(chunk: &Chunk, offset: usize) {
 136            if offset > chunk.text.len() {
 137                panic!(
 138                    "byte index {} is out of bounds of `{:?}` (length: {})",
 139                    offset,
 140                    chunk.text,
 141                    chunk.text.len()
 142                );
 143            }
 144            // find the character
 145            let char_start = chunk.floor_char_boundary(offset);
 146            // `char_start` must be less than len and a char boundary
 147            let ch = chunk
 148                .text
 149                .get(char_start..)
 150                .unwrap()
 151                .chars()
 152                .next()
 153                .unwrap();
 154            let char_range = char_start..char_start + ch.len_utf8();
 155            panic!(
 156                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
 157                offset, ch, char_range,
 158            );
 159        }
 160    }
 161}
 162
 163#[derive(Clone, Copy, Debug)]
 164pub struct ChunkSlice<'a> {
 165    chars: Bitmap,
 166    chars_utf16: Bitmap,
 167    newlines: Bitmap,
 168    tabs: Bitmap,
 169    text: &'a str,
 170}
 171
 172impl Into<Chunk> for ChunkSlice<'_> {
 173    fn into(self) -> Chunk {
 174        Chunk {
 175            chars: self.chars,
 176            chars_utf16: self.chars_utf16,
 177            newlines: self.newlines,
 178            tabs: self.tabs,
 179            text: self.text.try_into().unwrap(),
 180        }
 181    }
 182}
 183
 184impl<'a> ChunkSlice<'a> {
 185    #[inline(always)]
 186    pub fn is_empty(&self) -> bool {
 187        self.text.is_empty()
 188    }
 189
 190    #[inline(always)]
 191    pub fn is_char_boundary(self, offset: usize) -> bool {
 192        self.text.is_char_boundary(offset)
 193    }
 194
 195    #[inline(always)]
 196    pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
 197        if mid == MAX_BASE {
 198            let left = self;
 199            let right = ChunkSlice {
 200                chars: 0,
 201                chars_utf16: 0,
 202                newlines: 0,
 203                tabs: 0,
 204                text: "",
 205            };
 206            (left, right)
 207        } else {
 208            let mask = ((1 as Bitmap) << mid) - 1;
 209            let (left_text, right_text) = self.text.split_at(mid);
 210            let left = ChunkSlice {
 211                chars: self.chars & mask,
 212                chars_utf16: self.chars_utf16 & mask,
 213                newlines: self.newlines & mask,
 214                tabs: self.tabs & mask,
 215                text: left_text,
 216            };
 217            let right = ChunkSlice {
 218                chars: self.chars >> mid,
 219                chars_utf16: self.chars_utf16 >> mid,
 220                newlines: self.newlines >> mid,
 221                tabs: self.tabs >> mid,
 222                text: right_text,
 223            };
 224            (left, right)
 225        }
 226    }
 227
 228    #[inline(always)]
 229    pub fn slice(self, range: Range<usize>) -> Self {
 230        debug_assert!(
 231            self.is_char_boundary(range.end),
 232            "Invalid range end {} in {:?}",
 233            range.end,
 234            self
 235        );
 236        let mask = (1 as Bitmap)
 237            .unbounded_shl(range.end as u32)
 238            .wrapping_sub(1);
 239        if range.start == MAX_BASE {
 240            Self {
 241                chars: 0,
 242                chars_utf16: 0,
 243                newlines: 0,
 244                tabs: 0,
 245                text: "",
 246            }
 247        } else {
 248            debug_assert!(
 249                self.is_char_boundary(range.start),
 250                "Invalid range start {} in {:?}",
 251                range.start,
 252                self
 253            );
 254            Self {
 255                chars: (self.chars & mask) >> range.start,
 256                chars_utf16: (self.chars_utf16 & mask) >> range.start,
 257                newlines: (self.newlines & mask) >> range.start,
 258                tabs: (self.tabs & mask) >> range.start,
 259                text: &self.text[range],
 260            }
 261        }
 262    }
 263
 264    #[inline(always)]
 265    pub fn text_summary(&self) -> TextSummary {
 266        let mut chars = 0;
 267        let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
 268        TextSummary {
 269            len: self.len(),
 270            chars,
 271            len_utf16: self.len_utf16(),
 272            lines: self.lines(),
 273            first_line_chars: self.first_line_chars(),
 274            last_line_chars: self.last_line_chars(),
 275            last_line_len_utf16: self.last_line_len_utf16(),
 276            longest_row,
 277            longest_row_chars,
 278        }
 279    }
 280
 281    /// Get length in bytes
 282    #[inline(always)]
 283    pub fn len(&self) -> usize {
 284        self.text.len()
 285    }
 286
 287    /// Get length in UTF-16 code units
 288    #[inline(always)]
 289    pub fn len_utf16(&self) -> OffsetUtf16 {
 290        OffsetUtf16(self.chars_utf16.count_ones() as usize)
 291    }
 292
 293    /// Get point representing number of lines and length of last line
 294    #[inline(always)]
 295    pub fn lines(&self) -> Point {
 296        let row = self.newlines.count_ones();
 297        let column = self.newlines.leading_zeros() - (Bitmap::BITS - self.text.len() as u32);
 298        Point::new(row, column)
 299    }
 300
 301    /// Get number of chars in first line
 302    #[inline(always)]
 303    pub fn first_line_chars(&self) -> u32 {
 304        if self.newlines == 0 {
 305            self.chars.count_ones()
 306        } else {
 307            let mask = ((1 as Bitmap) << self.newlines.trailing_zeros()) - 1;
 308            (self.chars & mask).count_ones()
 309        }
 310    }
 311
 312    /// Get number of chars in last line
 313    #[inline(always)]
 314    pub fn last_line_chars(&self) -> u32 {
 315        if self.newlines == 0 {
 316            self.chars.count_ones()
 317        } else {
 318            let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
 319            (self.chars & mask).count_ones()
 320        }
 321    }
 322
 323    /// Get number of UTF-16 code units in last line
 324    #[inline(always)]
 325    pub fn last_line_len_utf16(&self) -> u32 {
 326        if self.newlines == 0 {
 327            self.chars_utf16.count_ones()
 328        } else {
 329            let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
 330            (self.chars_utf16 & mask).count_ones()
 331        }
 332    }
 333
 334    /// Get the longest row in the chunk and its length in characters.
 335    /// Calculate the total number of characters in the chunk along the way.
 336    #[inline(always)]
 337    pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
 338        let mut chars = self.chars;
 339        let mut newlines = self.newlines;
 340        *total_chars = 0;
 341        let mut row = 0;
 342        let mut longest_row = 0;
 343        let mut longest_row_chars = 0;
 344        while newlines > 0 {
 345            let newline_ix = newlines.trailing_zeros();
 346            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
 347            *total_chars += usize::from(row_chars);
 348            if row_chars > longest_row_chars {
 349                longest_row = row;
 350                longest_row_chars = row_chars;
 351            }
 352
 353            newlines >>= newline_ix;
 354            newlines >>= 1;
 355            chars >>= newline_ix;
 356            chars >>= 1;
 357            row += 1;
 358            *total_chars += 1;
 359        }
 360
 361        let row_chars = chars.count_ones() as u8;
 362        *total_chars += usize::from(row_chars);
 363        if row_chars > longest_row_chars {
 364            (row, row_chars as u32)
 365        } else {
 366            (longest_row, longest_row_chars as u32)
 367        }
 368    }
 369
 370    #[inline(always)]
 371    pub fn offset_to_point(&self, offset: usize) -> Point {
 372        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 373        let row = (self.newlines & mask).count_ones();
 374        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 375        let column = (offset - newline_ix as usize) as u32;
 376        Point::new(row, column)
 377    }
 378
 379    #[inline(always)]
 380    pub fn point_to_offset(&self, point: Point) -> usize {
 381        if point.row > self.lines().row {
 382            debug_panic!(
 383                "point {:?} extends beyond rows for string {:?}",
 384                point,
 385                self.text
 386            );
 387            return self.len();
 388        }
 389
 390        let row_offset_range = self.offset_range_for_row(point.row);
 391        if point.column > row_offset_range.len() as u32 {
 392            debug_panic!(
 393                "point {:?} extends beyond row for string {:?}",
 394                point,
 395                self.text
 396            );
 397            row_offset_range.end
 398        } else {
 399            row_offset_range.start + point.column as usize
 400        }
 401    }
 402
 403    #[inline(always)]
 404    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 405        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 406        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
 407    }
 408
 409    #[inline(always)]
 410    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 411        if target.0 == 0 {
 412            0
 413        } else {
 414            #[cfg(not(test))]
 415            let chars_utf16 = self.chars_utf16;
 416            #[cfg(test)]
 417            let chars_utf16 = self.chars_utf16 as u128;
 418            let ix = nth_set_bit(chars_utf16, target.0) + 1;
 419            if ix == MAX_BASE {
 420                MAX_BASE
 421            } else {
 422                let utf8_additional_len = cmp::min(
 423                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
 424                    self.text.len() - ix,
 425                );
 426                ix + utf8_additional_len
 427            }
 428        }
 429    }
 430
 431    #[inline(always)]
 432    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 433        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 434        let row = (self.newlines & mask).count_ones();
 435        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 436        let column = if newline_ix as usize == MAX_BASE {
 437            0
 438        } else {
 439            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
 440        };
 441        PointUtf16::new(row, column)
 442    }
 443
 444    #[inline(always)]
 445    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 446        self.offset_to_point_utf16(self.point_to_offset(point))
 447    }
 448
 449    #[inline(always)]
 450    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
 451        let lines = self.lines();
 452        if point.row > lines.row {
 453            if !clip {
 454                debug_panic!(
 455                    "point {:?} is beyond this chunk's extent {:?}",
 456                    point,
 457                    self.text
 458                );
 459            }
 460            return self.len();
 461        }
 462
 463        let row_offset_range = self.offset_range_for_row(point.row);
 464        let line = self.slice(row_offset_range.clone());
 465        if point.column > line.last_line_len_utf16() {
 466            if !clip {
 467                debug_panic!(
 468                    "point {:?} is beyond the end of the line in chunk {:?}",
 469                    point,
 470                    self.text
 471                );
 472            }
 473            return line.len();
 474        }
 475
 476        let mut offset = row_offset_range.start;
 477        if point.column > 0 {
 478            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
 479            if !self.text.is_char_boundary(offset) {
 480                offset -= 1;
 481                while !self.text.is_char_boundary(offset) {
 482                    offset -= 1;
 483                }
 484                if !clip {
 485                    debug_panic!(
 486                        "point {:?} is within character in chunk {:?}",
 487                        point,
 488                        self.text,
 489                    );
 490                }
 491            }
 492        }
 493        offset
 494    }
 495
 496    #[inline(always)]
 497    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 498        let max_point = self.lines();
 499        if point.0.row > max_point.row {
 500            return max_point;
 501        }
 502
 503        let row_offset_range = self.offset_range_for_row(point.0.row);
 504        let line = self.slice(row_offset_range);
 505        if point.0.column == 0 {
 506            Point::new(point.0.row, 0)
 507        } else if point.0.column >= line.len_utf16().0 as u32 {
 508            Point::new(point.0.row, line.len() as u32)
 509        } else {
 510            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
 511            while !line.text.is_char_boundary(column) {
 512                column -= 1;
 513            }
 514            Point::new(point.0.row, column as u32)
 515        }
 516    }
 517
 518    #[inline(always)]
 519    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 520        let max_point = self.lines();
 521        if point.row > max_point.row {
 522            return max_point;
 523        }
 524
 525        let line = self.slice(self.offset_range_for_row(point.row));
 526        if point.column == 0 {
 527            point
 528        } else if point.column >= line.len() as u32 {
 529            Point::new(point.row, line.len() as u32)
 530        } else {
 531            let mut column = point.column as usize;
 532            let bytes = line.text.as_bytes();
 533            if bytes[column - 1] < 128 && bytes[column] < 128 {
 534                return Point::new(point.row, column as u32);
 535            }
 536
 537            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
 538            loop {
 539                if line.is_char_boundary(column)
 540                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
 541                {
 542                    break;
 543                }
 544
 545                match bias {
 546                    Bias::Left => column -= 1,
 547                    Bias::Right => column += 1,
 548                }
 549                grapheme_cursor.set_cursor(column);
 550            }
 551            Point::new(point.row, column as u32)
 552        }
 553    }
 554
 555    #[inline(always)]
 556    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 557        let max_point = self.lines();
 558        if point.0.row > max_point.row {
 559            PointUtf16::new(max_point.row, self.last_line_len_utf16())
 560        } else {
 561            let line = self.slice(self.offset_range_for_row(point.0.row));
 562            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
 563            PointUtf16::new(point.0.row, column.0 as u32)
 564        }
 565    }
 566
 567    #[inline(always)]
 568    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 569        if target == OffsetUtf16::default() {
 570            OffsetUtf16::default()
 571        } else if target >= self.len_utf16() {
 572            self.len_utf16()
 573        } else {
 574            let mut offset = self.offset_utf16_to_offset(target);
 575            while !self.text.is_char_boundary(offset) {
 576                if bias == Bias::Left {
 577                    offset -= 1;
 578                } else {
 579                    offset += 1;
 580                }
 581            }
 582            self.offset_to_offset_utf16(offset)
 583        }
 584    }
 585
 586    #[inline(always)]
 587    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
 588        let row_start = if row > 0 {
 589            #[cfg(not(test))]
 590            let newlines = self.newlines;
 591            #[cfg(test)]
 592            let newlines = self.newlines as u128;
 593            nth_set_bit(newlines, row as usize) + 1
 594        } else {
 595            0
 596        };
 597        let row_len = if row_start == MAX_BASE {
 598            0
 599        } else {
 600            cmp::min(
 601                (self.newlines >> row_start).trailing_zeros(),
 602                (self.text.len() - row_start) as u32,
 603            )
 604        };
 605        row_start..row_start + row_len as usize
 606    }
 607
 608    #[inline(always)]
 609    pub fn tabs(&self) -> Tabs {
 610        Tabs {
 611            tabs: self.tabs,
 612            chars: self.chars,
 613        }
 614    }
 615}
 616
 617pub struct Tabs {
 618    tabs: Bitmap,
 619    chars: Bitmap,
 620}
 621
 622#[derive(Debug, PartialEq, Eq)]
 623pub struct TabPosition {
 624    pub byte_offset: usize,
 625    pub char_offset: usize,
 626}
 627
 628impl Iterator for Tabs {
 629    type Item = TabPosition;
 630
 631    fn next(&mut self) -> Option<Self::Item> {
 632        if self.tabs == 0 {
 633            return None;
 634        }
 635
 636        let tab_offset = self.tabs.trailing_zeros() as usize;
 637        let chars_mask = (1 << tab_offset) - 1;
 638        let char_offset = (self.chars & chars_mask).count_ones() as usize;
 639
 640        // Since tabs are 1 byte the tab offset is the same as the byte offset
 641        let position = TabPosition {
 642            byte_offset: tab_offset,
 643            char_offset,
 644        };
 645        // Remove the tab we've just seen
 646        self.tabs ^= 1 << tab_offset;
 647
 648        Some(position)
 649    }
 650}
 651
 652/// Finds the n-th bit that is set to 1.
 653#[inline(always)]
 654fn nth_set_bit(v: u128, n: usize) -> usize {
 655    let low = v as u64;
 656    let high = (v >> 64) as u64;
 657
 658    let low_count = low.count_ones() as usize;
 659    if n > low_count {
 660        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
 661    } else {
 662        nth_set_bit_u64(low, n as u64) as usize
 663    }
 664}
 665
 666#[inline(always)]
 667fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
 668    let v = v.reverse_bits();
 669    let mut s: u64 = 64;
 670
 671    // Parallel bit count intermediates
 672    let a = v - ((v >> 1) & (u64::MAX / 3));
 673    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
 674    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
 675    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
 676
 677    // Branchless select
 678    let t = (d >> 32) + (d >> 48);
 679    s -= (t.wrapping_sub(n) & 256) >> 3;
 680    n -= t & (t.wrapping_sub(n) >> 8);
 681
 682    let t = (d >> (s - 16)) & 0xff;
 683    s -= (t.wrapping_sub(n) & 256) >> 4;
 684    n -= t & (t.wrapping_sub(n) >> 8);
 685
 686    let t = (c >> (s - 8)) & 0xf;
 687    s -= (t.wrapping_sub(n) & 256) >> 5;
 688    n -= t & (t.wrapping_sub(n) >> 8);
 689
 690    let t = (b >> (s - 4)) & 0x7;
 691    s -= (t.wrapping_sub(n) & 256) >> 6;
 692    n -= t & (t.wrapping_sub(n) >> 8);
 693
 694    let t = (a >> (s - 2)) & 0x3;
 695    s -= (t.wrapping_sub(n) & 256) >> 7;
 696    n -= t & (t.wrapping_sub(n) >> 8);
 697
 698    let t = (v >> (s - 1)) & 0x1;
 699    s -= (t.wrapping_sub(n) & 256) >> 8;
 700
 701    65 - s - 1
 702}
 703
 704#[cfg(test)]
 705mod tests {
 706    use super::*;
 707    use rand::prelude::*;
 708    use util::RandomCharIter;
 709
 710    #[gpui::test(iterations = 100)]
 711    fn test_random_chunks(mut rng: StdRng) {
 712        let text = random_string_with_utf8_len(&mut rng, MAX_BASE);
 713        log::info!("Chunk: {:?}", text);
 714        let chunk = Chunk::new(&text);
 715        verify_chunk(chunk.as_slice(), &text);
 716
 717        // Verify Chunk::chars() bitmap
 718        let expected_chars = char_offsets(&text)
 719            .into_iter()
 720            .inspect(|i| assert!(*i < MAX_BASE))
 721            .fold(0 as Bitmap, |acc, i| acc | (1 << i));
 722        assert_eq!(chunk.chars(), expected_chars);
 723
 724        for _ in 0..10 {
 725            let mut start = rng.random_range(0..=chunk.text.len());
 726            let mut end = rng.random_range(start..=chunk.text.len());
 727            while !chunk.text.is_char_boundary(start) {
 728                start -= 1;
 729            }
 730            while !chunk.text.is_char_boundary(end) {
 731                end -= 1;
 732            }
 733            let range = start..end;
 734            log::info!("Range: {:?}", range);
 735            let text_slice = &text[range.clone()];
 736            let chunk_slice = chunk.slice(range);
 737            verify_chunk(chunk_slice, text_slice);
 738        }
 739    }
 740
 741    #[gpui::test(iterations = 100)]
 742    fn test_split_chunk_slice(mut rng: StdRng) {
 743        let text = &random_string_with_utf8_len(&mut rng, MAX_BASE);
 744        let chunk = Chunk::new(text);
 745        let offset = char_offsets_with_end(text)
 746            .into_iter()
 747            .choose(&mut rng)
 748            .unwrap();
 749        let (a, b) = chunk.as_slice().split_at(offset);
 750        let (a_str, b_str) = text.split_at(offset);
 751        verify_chunk(a, a_str);
 752        verify_chunk(b, b_str);
 753    }
 754
 755    #[gpui::test(iterations = 1000)]
 756    fn test_nth_set_bit_random(mut rng: StdRng) {
 757        let set_count = rng.random_range(0..=128);
 758        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
 759        set_bits.sort();
 760        let mut n = 0;
 761        for ix in set_bits.iter().copied() {
 762            n |= 1 << ix;
 763        }
 764
 765        for (mut ix, position) in set_bits.into_iter().enumerate() {
 766            ix += 1;
 767            assert_eq!(
 768                nth_set_bit(n, ix),
 769                position,
 770                "nth_set_bit({:0128b}, {})",
 771                n,
 772                ix
 773            );
 774        }
 775    }
 776
 777    /// Returns a (biased) random string whose UTF-8 length is no more than `len`.
 778    fn random_string_with_utf8_len(rng: &mut StdRng, len: usize) -> String {
 779        let mut str = String::new();
 780        let mut chars = RandomCharIter::new(rng);
 781        loop {
 782            let ch = chars.next().unwrap();
 783            if str.len() + ch.len_utf8() > len {
 784                break;
 785            }
 786            str.push(ch);
 787        }
 788        str
 789    }
 790
 791    #[gpui::test(iterations = 1000)]
 792    fn test_append_random_strings(mut rng: StdRng) {
 793        let len1 = rng.random_range(0..=MAX_BASE);
 794        let len2 = rng.random_range(0..=MAX_BASE).saturating_sub(len1);
 795        let str1 = random_string_with_utf8_len(&mut rng, len1);
 796        let str2 = random_string_with_utf8_len(&mut rng, len2);
 797        let mut chunk1 = Chunk::new(&str1);
 798        let chunk2 = Chunk::new(&str2);
 799        let char_offsets = char_offsets_with_end(&str2);
 800        let start_index = rng.random_range(0..char_offsets.len());
 801        let start_offset = char_offsets[start_index];
 802        let end_offset = char_offsets[rng.random_range(start_index..char_offsets.len())];
 803        chunk1.append(chunk2.slice(start_offset..end_offset));
 804        verify_chunk(chunk1.as_slice(), &(str1 + &str2[start_offset..end_offset]));
 805    }
 806
 807    /// Return the byte offsets for each character in a string.
 808    ///
 809    /// These are valid offsets to split the string.
 810    fn char_offsets(text: &str) -> Vec<usize> {
 811        text.char_indices().map(|(i, _c)| i).collect()
 812    }
 813
 814    /// Return the byte offsets for each character in a string, plus the offset
 815    /// past the end of the string.
 816    fn char_offsets_with_end(text: &str) -> Vec<usize> {
 817        let mut v = char_offsets(text);
 818        v.push(text.len());
 819        v
 820    }
 821
 822    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
 823        let mut offset = 0;
 824        let mut offset_utf16 = OffsetUtf16(0);
 825        let mut point = Point::zero();
 826        let mut point_utf16 = PointUtf16::zero();
 827
 828        log::info!("Verifying chunk {:?}", text);
 829        assert_eq!(chunk.offset_to_point(0), Point::zero());
 830
 831        let mut expected_tab_positions = Vec::new();
 832
 833        for (char_offset, c) in text.chars().enumerate() {
 834            let expected_point = chunk.offset_to_point(offset);
 835            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
 836            assert_eq!(
 837                chunk.point_to_offset(point),
 838                offset,
 839                "mismatch at point {:?}",
 840                point
 841            );
 842            assert_eq!(
 843                chunk.offset_to_offset_utf16(offset),
 844                offset_utf16,
 845                "mismatch at offset {}",
 846                offset
 847            );
 848            assert_eq!(
 849                chunk.offset_utf16_to_offset(offset_utf16),
 850                offset,
 851                "mismatch at offset_utf16 {:?}",
 852                offset_utf16
 853            );
 854            assert_eq!(
 855                chunk.point_to_point_utf16(point),
 856                point_utf16,
 857                "mismatch at point {:?}",
 858                point
 859            );
 860            assert_eq!(
 861                chunk.point_utf16_to_offset(point_utf16, false),
 862                offset,
 863                "mismatch at point_utf16 {:?}",
 864                point_utf16
 865            );
 866            assert_eq!(
 867                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 868                point,
 869                "mismatch for unclipped_point_utf16_to_point at {:?}",
 870                point_utf16
 871            );
 872
 873            assert_eq!(
 874                chunk.clip_point(point, Bias::Left),
 875                point,
 876                "incorrect left clip at {:?}",
 877                point
 878            );
 879            assert_eq!(
 880                chunk.clip_point(point, Bias::Right),
 881                point,
 882                "incorrect right clip at {:?}",
 883                point
 884            );
 885
 886            for i in 1..c.len_utf8() {
 887                let test_point = Point::new(point.row, point.column + i as u32);
 888                assert_eq!(
 889                    chunk.clip_point(test_point, Bias::Left),
 890                    point,
 891                    "incorrect left clip within multi-byte char at {:?}",
 892                    test_point
 893                );
 894                assert_eq!(
 895                    chunk.clip_point(test_point, Bias::Right),
 896                    Point::new(point.row, point.column + c.len_utf8() as u32),
 897                    "incorrect right clip within multi-byte char at {:?}",
 898                    test_point
 899                );
 900            }
 901
 902            for i in 1..c.len_utf16() {
 903                let test_point = Unclipped(PointUtf16::new(
 904                    point_utf16.row,
 905                    point_utf16.column + i as u32,
 906                ));
 907                assert_eq!(
 908                    chunk.unclipped_point_utf16_to_point(test_point),
 909                    point,
 910                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
 911                    test_point
 912                );
 913                assert_eq!(
 914                    chunk.clip_point_utf16(test_point, Bias::Left),
 915                    point_utf16,
 916                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
 917                    test_point
 918                );
 919                assert_eq!(
 920                    chunk.clip_point_utf16(test_point, Bias::Right),
 921                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
 922                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
 923                    test_point
 924                );
 925
 926                let test_offset = OffsetUtf16(offset_utf16.0 + i);
 927                assert_eq!(
 928                    chunk.clip_offset_utf16(test_offset, Bias::Left),
 929                    offset_utf16,
 930                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
 931                    test_offset
 932                );
 933                assert_eq!(
 934                    chunk.clip_offset_utf16(test_offset, Bias::Right),
 935                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
 936                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
 937                    test_offset
 938                );
 939            }
 940
 941            if c == '\n' {
 942                point.row += 1;
 943                point.column = 0;
 944                point_utf16.row += 1;
 945                point_utf16.column = 0;
 946            } else {
 947                point.column += c.len_utf8() as u32;
 948                point_utf16.column += c.len_utf16() as u32;
 949            }
 950
 951            if c == '\t' {
 952                expected_tab_positions.push(TabPosition {
 953                    byte_offset: offset,
 954                    char_offset,
 955                });
 956            }
 957
 958            offset += c.len_utf8();
 959            offset_utf16.0 += c.len_utf16();
 960        }
 961
 962        let final_point = chunk.offset_to_point(offset);
 963        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
 964        assert_eq!(
 965            chunk.point_to_offset(point),
 966            offset,
 967            "mismatch at point {:?}",
 968            point
 969        );
 970        assert_eq!(
 971            chunk.offset_to_offset_utf16(offset),
 972            offset_utf16,
 973            "mismatch at offset {}",
 974            offset
 975        );
 976        assert_eq!(
 977            chunk.offset_utf16_to_offset(offset_utf16),
 978            offset,
 979            "mismatch at offset_utf16 {:?}",
 980            offset_utf16
 981        );
 982        assert_eq!(
 983            chunk.point_to_point_utf16(point),
 984            point_utf16,
 985            "mismatch at final point {:?}",
 986            point
 987        );
 988        assert_eq!(
 989            chunk.point_utf16_to_offset(point_utf16, false),
 990            offset,
 991            "mismatch at final point_utf16 {:?}",
 992            point_utf16
 993        );
 994        assert_eq!(
 995            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 996            point,
 997            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
 998            point_utf16
 999        );
1000        assert_eq!(
1001            chunk.clip_point(point, Bias::Left),
1002            point,
1003            "incorrect left clip at final point {:?}",
1004            point
1005        );
1006        assert_eq!(
1007            chunk.clip_point(point, Bias::Right),
1008            point,
1009            "incorrect right clip at final point {:?}",
1010            point
1011        );
1012        assert_eq!(
1013            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
1014            point_utf16,
1015            "incorrect left clip_point_utf16 at final point {:?}",
1016            point_utf16
1017        );
1018        assert_eq!(
1019            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
1020            point_utf16,
1021            "incorrect right clip_point_utf16 at final point {:?}",
1022            point_utf16
1023        );
1024        assert_eq!(
1025            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
1026            offset_utf16,
1027            "incorrect left clip_offset_utf16 at final offset {:?}",
1028            offset_utf16
1029        );
1030        assert_eq!(
1031            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
1032            offset_utf16,
1033            "incorrect right clip_offset_utf16 at final offset {:?}",
1034            offset_utf16
1035        );
1036
1037        // Verify length methods
1038        assert_eq!(chunk.len(), text.len());
1039        assert_eq!(
1040            chunk.len_utf16().0,
1041            text.chars().map(|c| c.len_utf16()).sum::<usize>()
1042        );
1043
1044        // Verify line counting
1045        let lines = chunk.lines();
1046        let mut newline_count = 0;
1047        let mut last_line_len = 0;
1048        for c in text.chars() {
1049            if c == '\n' {
1050                newline_count += 1;
1051                last_line_len = 0;
1052            } else {
1053                last_line_len += c.len_utf8() as u32;
1054            }
1055        }
1056        assert_eq!(lines, Point::new(newline_count, last_line_len));
1057
1058        // Verify first/last line chars
1059        if !text.is_empty() {
1060            let first_line = text.split('\n').next().unwrap();
1061            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
1062
1063            let last_line = text.split('\n').next_back().unwrap();
1064            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
1065            assert_eq!(
1066                chunk.last_line_len_utf16(),
1067                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
1068            );
1069        }
1070
1071        // Verify longest row
1072        let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
1073        let mut max_chars = 0;
1074        let mut current_row = 0;
1075        let mut current_chars = 0;
1076        let mut max_row = 0;
1077
1078        for c in text.chars() {
1079            if c == '\n' {
1080                if current_chars > max_chars {
1081                    max_chars = current_chars;
1082                    max_row = current_row;
1083                }
1084                current_row += 1;
1085                current_chars = 0;
1086            } else {
1087                current_chars += 1;
1088            }
1089        }
1090
1091        if current_chars > max_chars {
1092            max_chars = current_chars;
1093            max_row = current_row;
1094        }
1095
1096        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
1097        assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
1098    }
1099}