chunk.rs

   1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
   2use arrayvec::ArrayString;
   3use std::{cmp, ops::Range};
   4use sum_tree::Bias;
   5use unicode_segmentation::GraphemeCursor;
   6use util::debug_panic;
   7
   8#[cfg(not(all(test, not(rust_analyzer))))]
   9pub(crate) type Bitmap = u128;
  10#[cfg(all(test, not(rust_analyzer)))]
  11pub(crate) type Bitmap = u16;
  12
  13pub(crate) const MIN_BASE: usize = MAX_BASE / 2;
  14pub(crate) const MAX_BASE: usize = Bitmap::BITS as usize;
  15
  16#[derive(Clone, Debug, Default)]
  17pub struct Chunk {
  18    /// If bit[i] is set, then the character at index i is the start of a UTF-8 character in the
  19    /// text.
  20    chars: Bitmap,
  21    /// The number of set bits is the number of UTF-16 code units it would take to represent the
  22    /// text.
  23    ///
  24    /// Bit[i] is set if text[i] is the start of a UTF-8 character. If the character would
  25    /// take two UTF-16 code units, then bit[i+1] is also set. (Rust chars never take more
  26    /// than two UTF-16 code units.)
  27    chars_utf16: Bitmap,
  28    /// If bit[i] is set, then the character at index i is an ascii newline.
  29    newlines: Bitmap,
  30    /// If bit[i] is set, then the character at index i is an ascii tab.
  31    tabs: Bitmap,
  32    pub text: ArrayString<MAX_BASE>,
  33}
  34
  35impl Chunk {
  36    pub const MASK_BITS: usize = Bitmap::BITS as usize;
  37
  38    #[inline(always)]
  39    pub fn new(text: &str) -> Self {
  40        let mut this = Chunk::default();
  41        this.push_str(text);
  42        this
  43    }
  44
  45    #[inline(always)]
  46    pub fn push_str(&mut self, text: &str) {
  47        for (char_ix, c) in text.char_indices() {
  48            let ix = self.text.len() + char_ix;
  49            self.chars |= 1 << ix;
  50            self.chars_utf16 |= 1 << ix;
  51            self.chars_utf16 |= (c.len_utf16() as Bitmap) << ix;
  52            self.newlines |= ((c == '\n') as Bitmap) << ix;
  53            self.tabs |= ((c == '\t') as Bitmap) << ix;
  54        }
  55        self.text.push_str(text);
  56    }
  57
  58    #[inline(always)]
  59    pub fn append(&mut self, slice: ChunkSlice) {
  60        if slice.is_empty() {
  61            return;
  62        };
  63
  64        let base_ix = self.text.len();
  65        self.chars |= slice.chars << base_ix;
  66        self.chars_utf16 |= slice.chars_utf16 << base_ix;
  67        self.newlines |= slice.newlines << base_ix;
  68        self.tabs |= slice.tabs << base_ix;
  69        self.text.push_str(slice.text);
  70    }
  71
  72    #[inline(always)]
  73    pub fn as_slice(&self) -> ChunkSlice<'_> {
  74        ChunkSlice {
  75            chars: self.chars,
  76            chars_utf16: self.chars_utf16,
  77            newlines: self.newlines,
  78            tabs: self.tabs,
  79            text: &self.text,
  80        }
  81    }
  82
  83    #[inline(always)]
  84    pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
  85        self.as_slice().slice(range)
  86    }
  87
  88    #[inline(always)]
  89    pub fn chars(&self) -> Bitmap {
  90        self.chars
  91    }
  92
  93    pub fn tabs(&self) -> Bitmap {
  94        self.tabs
  95    }
  96
  97    #[inline(always)]
  98    pub fn is_char_boundary(&self, offset: usize) -> bool {
  99        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 100    }
 101}
 102
 103#[derive(Clone, Copy, Debug)]
 104pub struct ChunkSlice<'a> {
 105    chars: Bitmap,
 106    chars_utf16: Bitmap,
 107    newlines: Bitmap,
 108    tabs: Bitmap,
 109    text: &'a str,
 110}
 111
 112impl Into<Chunk> for ChunkSlice<'_> {
 113    fn into(self) -> Chunk {
 114        Chunk {
 115            chars: self.chars,
 116            chars_utf16: self.chars_utf16,
 117            newlines: self.newlines,
 118            tabs: self.tabs,
 119            text: self.text.try_into().unwrap(),
 120        }
 121    }
 122}
 123
 124impl<'a> ChunkSlice<'a> {
 125    #[inline(always)]
 126    pub fn is_empty(&self) -> bool {
 127        self.text.is_empty()
 128    }
 129
 130    #[inline(always)]
 131    pub fn is_char_boundary(&self, offset: usize) -> bool {
 132        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 133    }
 134
 135    #[inline(always)]
 136    pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
 137        if mid == MAX_BASE {
 138            let left = self;
 139            let right = ChunkSlice {
 140                chars: 0,
 141                chars_utf16: 0,
 142                newlines: 0,
 143                tabs: 0,
 144                text: "",
 145            };
 146            (left, right)
 147        } else {
 148            let mask = ((1 as Bitmap) << mid) - 1;
 149            let (left_text, right_text) = self.text.split_at(mid);
 150            let left = ChunkSlice {
 151                chars: self.chars & mask,
 152                chars_utf16: self.chars_utf16 & mask,
 153                newlines: self.newlines & mask,
 154                tabs: self.tabs & mask,
 155                text: left_text,
 156            };
 157            let right = ChunkSlice {
 158                chars: self.chars >> mid,
 159                chars_utf16: self.chars_utf16 >> mid,
 160                newlines: self.newlines >> mid,
 161                tabs: self.tabs >> mid,
 162                text: right_text,
 163            };
 164            (left, right)
 165        }
 166    }
 167
 168    #[inline(always)]
 169    pub fn slice(self, range: Range<usize>) -> Self {
 170        debug_assert!(
 171            self.is_char_boundary(range.end),
 172            "Invalid range end {} in {:?}",
 173            range.end,
 174            self
 175        );
 176        let mask = (1 as Bitmap)
 177            .unbounded_shl(range.end as u32)
 178            .wrapping_sub(1);
 179        if range.start == MAX_BASE {
 180            Self {
 181                chars: 0,
 182                chars_utf16: 0,
 183                newlines: 0,
 184                tabs: 0,
 185                text: "",
 186            }
 187        } else {
 188            debug_assert!(
 189                self.is_char_boundary(range.start),
 190                "Invalid range start {} in {:?}",
 191                range.start,
 192                self
 193            );
 194            Self {
 195                chars: (self.chars & mask) >> range.start,
 196                chars_utf16: (self.chars_utf16 & mask) >> range.start,
 197                newlines: (self.newlines & mask) >> range.start,
 198                tabs: (self.tabs & mask) >> range.start,
 199                text: &self.text[range],
 200            }
 201        }
 202    }
 203
 204    #[inline(always)]
 205    pub fn text_summary(&self) -> TextSummary {
 206        let mut chars = 0;
 207        let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
 208        TextSummary {
 209            len: self.len(),
 210            chars,
 211            len_utf16: self.len_utf16(),
 212            lines: self.lines(),
 213            first_line_chars: self.first_line_chars(),
 214            last_line_chars: self.last_line_chars(),
 215            last_line_len_utf16: self.last_line_len_utf16(),
 216            longest_row,
 217            longest_row_chars,
 218        }
 219    }
 220
 221    /// Get length in bytes
 222    #[inline(always)]
 223    pub fn len(&self) -> usize {
 224        self.text.len()
 225    }
 226
 227    /// Get length in UTF-16 code units
 228    #[inline(always)]
 229    pub fn len_utf16(&self) -> OffsetUtf16 {
 230        OffsetUtf16(self.chars_utf16.count_ones() as usize)
 231    }
 232
 233    /// Get point representing number of lines and length of last line
 234    #[inline(always)]
 235    pub fn lines(&self) -> Point {
 236        let row = self.newlines.count_ones();
 237        let column = self.newlines.leading_zeros() - (Bitmap::BITS - self.text.len() as u32);
 238        Point::new(row, column)
 239    }
 240
 241    /// Get number of chars in first line
 242    #[inline(always)]
 243    pub fn first_line_chars(&self) -> u32 {
 244        if self.newlines == 0 {
 245            self.chars.count_ones()
 246        } else {
 247            let mask = ((1 as Bitmap) << self.newlines.trailing_zeros()) - 1;
 248            (self.chars & mask).count_ones()
 249        }
 250    }
 251
 252    /// Get number of chars in last line
 253    #[inline(always)]
 254    pub fn last_line_chars(&self) -> u32 {
 255        if self.newlines == 0 {
 256            self.chars.count_ones()
 257        } else {
 258            let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
 259            (self.chars & mask).count_ones()
 260        }
 261    }
 262
 263    /// Get number of UTF-16 code units in last line
 264    #[inline(always)]
 265    pub fn last_line_len_utf16(&self) -> u32 {
 266        if self.newlines == 0 {
 267            self.chars_utf16.count_ones()
 268        } else {
 269            let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
 270            (self.chars_utf16 & mask).count_ones()
 271        }
 272    }
 273
 274    /// Get the longest row in the chunk and its length in characters.
 275    /// Calculate the total number of characters in the chunk along the way.
 276    #[inline(always)]
 277    pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
 278        let mut chars = self.chars;
 279        let mut newlines = self.newlines;
 280        *total_chars = 0;
 281        let mut row = 0;
 282        let mut longest_row = 0;
 283        let mut longest_row_chars = 0;
 284        while newlines > 0 {
 285            let newline_ix = newlines.trailing_zeros();
 286            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
 287            *total_chars += usize::from(row_chars);
 288            if row_chars > longest_row_chars {
 289                longest_row = row;
 290                longest_row_chars = row_chars;
 291            }
 292
 293            newlines >>= newline_ix;
 294            newlines >>= 1;
 295            chars >>= newline_ix;
 296            chars >>= 1;
 297            row += 1;
 298            *total_chars += 1;
 299        }
 300
 301        let row_chars = chars.count_ones() as u8;
 302        *total_chars += usize::from(row_chars);
 303        if row_chars > longest_row_chars {
 304            (row, row_chars as u32)
 305        } else {
 306            (longest_row, longest_row_chars as u32)
 307        }
 308    }
 309
 310    #[inline(always)]
 311    pub fn offset_to_point(&self, offset: usize) -> Point {
 312        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 313        let row = (self.newlines & mask).count_ones();
 314        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 315        let column = (offset - newline_ix as usize) as u32;
 316        Point::new(row, column)
 317    }
 318
 319    #[inline(always)]
 320    pub fn point_to_offset(&self, point: Point) -> usize {
 321        if point.row > self.lines().row {
 322            debug_panic!(
 323                "point {:?} extends beyond rows for string {:?}",
 324                point,
 325                self.text
 326            );
 327            return self.len();
 328        }
 329
 330        let row_offset_range = self.offset_range_for_row(point.row);
 331        if point.column > row_offset_range.len() as u32 {
 332            debug_panic!(
 333                "point {:?} extends beyond row for string {:?}",
 334                point,
 335                self.text
 336            );
 337            row_offset_range.end
 338        } else {
 339            row_offset_range.start + point.column as usize
 340        }
 341    }
 342
 343    #[inline(always)]
 344    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 345        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 346        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
 347    }
 348
 349    #[inline(always)]
 350    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 351        if target.0 == 0 {
 352            0
 353        } else {
 354            #[cfg(not(test))]
 355            let chars_utf16 = self.chars_utf16;
 356            #[cfg(test)]
 357            let chars_utf16 = self.chars_utf16 as u128;
 358            let ix = nth_set_bit(chars_utf16, target.0) + 1;
 359            if ix == MAX_BASE {
 360                MAX_BASE
 361            } else {
 362                let utf8_additional_len = cmp::min(
 363                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
 364                    self.text.len() - ix,
 365                );
 366                ix + utf8_additional_len
 367            }
 368        }
 369    }
 370
 371    #[inline(always)]
 372    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 373        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 374        let row = (self.newlines & mask).count_ones();
 375        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 376        let column = if newline_ix as usize == MAX_BASE {
 377            0
 378        } else {
 379            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
 380        };
 381        PointUtf16::new(row, column)
 382    }
 383
 384    #[inline(always)]
 385    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 386        self.offset_to_point_utf16(self.point_to_offset(point))
 387    }
 388
 389    #[inline(always)]
 390    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
 391        let lines = self.lines();
 392        if point.row > lines.row {
 393            if !clip {
 394                debug_panic!(
 395                    "point {:?} is beyond this chunk's extent {:?}",
 396                    point,
 397                    self.text
 398                );
 399            }
 400            return self.len();
 401        }
 402
 403        let row_offset_range = self.offset_range_for_row(point.row);
 404        let line = self.slice(row_offset_range.clone());
 405        if point.column > line.last_line_len_utf16() {
 406            if !clip {
 407                debug_panic!(
 408                    "point {:?} is beyond the end of the line in chunk {:?}",
 409                    point,
 410                    self.text
 411                );
 412            }
 413            return line.len();
 414        }
 415
 416        let mut offset = row_offset_range.start;
 417        if point.column > 0 {
 418            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
 419            if !self.text.is_char_boundary(offset) {
 420                offset -= 1;
 421                while !self.text.is_char_boundary(offset) {
 422                    offset -= 1;
 423                }
 424                if !clip {
 425                    debug_panic!(
 426                        "point {:?} is within character in chunk {:?}",
 427                        point,
 428                        self.text,
 429                    );
 430                }
 431            }
 432        }
 433        offset
 434    }
 435
 436    #[inline(always)]
 437    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 438        let max_point = self.lines();
 439        if point.0.row > max_point.row {
 440            return max_point;
 441        }
 442
 443        let row_offset_range = self.offset_range_for_row(point.0.row);
 444        let line = self.slice(row_offset_range);
 445        if point.0.column == 0 {
 446            Point::new(point.0.row, 0)
 447        } else if point.0.column >= line.len_utf16().0 as u32 {
 448            Point::new(point.0.row, line.len() as u32)
 449        } else {
 450            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
 451            while !line.text.is_char_boundary(column) {
 452                column -= 1;
 453            }
 454            Point::new(point.0.row, column as u32)
 455        }
 456    }
 457
 458    #[inline(always)]
 459    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 460        let max_point = self.lines();
 461        if point.row > max_point.row {
 462            return max_point;
 463        }
 464
 465        let line = self.slice(self.offset_range_for_row(point.row));
 466        if point.column == 0 {
 467            point
 468        } else if point.column >= line.len() as u32 {
 469            Point::new(point.row, line.len() as u32)
 470        } else {
 471            let mut column = point.column as usize;
 472            let bytes = line.text.as_bytes();
 473            if bytes[column - 1] < 128 && bytes[column] < 128 {
 474                return Point::new(point.row, column as u32);
 475            }
 476
 477            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
 478            loop {
 479                if line.is_char_boundary(column)
 480                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
 481                {
 482                    break;
 483                }
 484
 485                match bias {
 486                    Bias::Left => column -= 1,
 487                    Bias::Right => column += 1,
 488                }
 489                grapheme_cursor.set_cursor(column);
 490            }
 491            Point::new(point.row, column as u32)
 492        }
 493    }
 494
 495    #[inline(always)]
 496    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 497        let max_point = self.lines();
 498        if point.0.row > max_point.row {
 499            PointUtf16::new(max_point.row, self.last_line_len_utf16())
 500        } else {
 501            let line = self.slice(self.offset_range_for_row(point.0.row));
 502            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
 503            PointUtf16::new(point.0.row, column.0 as u32)
 504        }
 505    }
 506
 507    #[inline(always)]
 508    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 509        if target == OffsetUtf16::default() {
 510            OffsetUtf16::default()
 511        } else if target >= self.len_utf16() {
 512            self.len_utf16()
 513        } else {
 514            let mut offset = self.offset_utf16_to_offset(target);
 515            while !self.text.is_char_boundary(offset) {
 516                if bias == Bias::Left {
 517                    offset -= 1;
 518                } else {
 519                    offset += 1;
 520                }
 521            }
 522            self.offset_to_offset_utf16(offset)
 523        }
 524    }
 525
 526    #[inline(always)]
 527    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
 528        let row_start = if row > 0 {
 529            #[cfg(not(test))]
 530            let newlines = self.newlines;
 531            #[cfg(test)]
 532            let newlines = self.newlines as u128;
 533            nth_set_bit(newlines, row as usize) + 1
 534        } else {
 535            0
 536        };
 537        let row_len = if row_start == MAX_BASE {
 538            0
 539        } else {
 540            cmp::min(
 541                (self.newlines >> row_start).trailing_zeros(),
 542                (self.text.len() - row_start) as u32,
 543            )
 544        };
 545        row_start..row_start + row_len as usize
 546    }
 547
 548    #[inline(always)]
 549    pub fn tabs(&self) -> Tabs {
 550        Tabs {
 551            tabs: self.tabs,
 552            chars: self.chars,
 553        }
 554    }
 555}
 556
 557pub struct Tabs {
 558    tabs: Bitmap,
 559    chars: Bitmap,
 560}
 561
 562#[derive(Debug, PartialEq, Eq)]
 563pub struct TabPosition {
 564    pub byte_offset: usize,
 565    pub char_offset: usize,
 566}
 567
 568impl Iterator for Tabs {
 569    type Item = TabPosition;
 570
 571    fn next(&mut self) -> Option<Self::Item> {
 572        if self.tabs == 0 {
 573            return None;
 574        }
 575
 576        let tab_offset = self.tabs.trailing_zeros() as usize;
 577        let chars_mask = (1 << tab_offset) - 1;
 578        let char_offset = (self.chars & chars_mask).count_ones() as usize;
 579
 580        // Since tabs are 1 byte the tab offset is the same as the byte offset
 581        let position = TabPosition {
 582            byte_offset: tab_offset,
 583            char_offset,
 584        };
 585        // Remove the tab we've just seen
 586        self.tabs ^= 1 << tab_offset;
 587
 588        Some(position)
 589    }
 590}
 591
 592/// Finds the n-th bit that is set to 1.
 593#[inline(always)]
 594fn nth_set_bit(v: u128, n: usize) -> usize {
 595    let low = v as u64;
 596    let high = (v >> 64) as u64;
 597
 598    let low_count = low.count_ones() as usize;
 599    if n > low_count {
 600        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
 601    } else {
 602        nth_set_bit_u64(low, n as u64) as usize
 603    }
 604}
 605
 606#[inline(always)]
 607fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
 608    let v = v.reverse_bits();
 609    let mut s: u64 = 64;
 610
 611    // Parallel bit count intermediates
 612    let a = v - ((v >> 1) & (u64::MAX / 3));
 613    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
 614    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
 615    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
 616
 617    // Branchless select
 618    let t = (d >> 32) + (d >> 48);
 619    s -= (t.wrapping_sub(n) & 256) >> 3;
 620    n -= t & (t.wrapping_sub(n) >> 8);
 621
 622    let t = (d >> (s - 16)) & 0xff;
 623    s -= (t.wrapping_sub(n) & 256) >> 4;
 624    n -= t & (t.wrapping_sub(n) >> 8);
 625
 626    let t = (c >> (s - 8)) & 0xf;
 627    s -= (t.wrapping_sub(n) & 256) >> 5;
 628    n -= t & (t.wrapping_sub(n) >> 8);
 629
 630    let t = (b >> (s - 4)) & 0x7;
 631    s -= (t.wrapping_sub(n) & 256) >> 6;
 632    n -= t & (t.wrapping_sub(n) >> 8);
 633
 634    let t = (a >> (s - 2)) & 0x3;
 635    s -= (t.wrapping_sub(n) & 256) >> 7;
 636    n -= t & (t.wrapping_sub(n) >> 8);
 637
 638    let t = (v >> (s - 1)) & 0x1;
 639    s -= (t.wrapping_sub(n) & 256) >> 8;
 640
 641    65 - s - 1
 642}
 643
 644#[cfg(test)]
 645mod tests {
 646    use super::*;
 647    use rand::prelude::*;
 648    use util::RandomCharIter;
 649
 650    #[gpui::test(iterations = 100)]
 651    fn test_random_chunks(mut rng: StdRng) {
 652        let text = random_string_with_utf8_len(&mut rng, MAX_BASE);
 653        log::info!("Chunk: {:?}", text);
 654        let chunk = Chunk::new(&text);
 655        verify_chunk(chunk.as_slice(), &text);
 656
 657        // Verify Chunk::chars() bitmap
 658        let expected_chars = char_offsets(&text)
 659            .into_iter()
 660            .inspect(|i| assert!(*i < MAX_BASE))
 661            .fold(0 as Bitmap, |acc, i| acc | (1 << i));
 662        assert_eq!(chunk.chars(), expected_chars);
 663
 664        for _ in 0..10 {
 665            let mut start = rng.random_range(0..=chunk.text.len());
 666            let mut end = rng.random_range(start..=chunk.text.len());
 667            while !chunk.text.is_char_boundary(start) {
 668                start -= 1;
 669            }
 670            while !chunk.text.is_char_boundary(end) {
 671                end -= 1;
 672            }
 673            let range = start..end;
 674            log::info!("Range: {:?}", range);
 675            let text_slice = &text[range.clone()];
 676            let chunk_slice = chunk.slice(range);
 677            verify_chunk(chunk_slice, text_slice);
 678        }
 679    }
 680
 681    #[gpui::test(iterations = 100)]
 682    fn test_split_chunk_slice(mut rng: StdRng) {
 683        let text = &random_string_with_utf8_len(&mut rng, MAX_BASE);
 684        let chunk = Chunk::new(text);
 685        let offset = char_offsets_with_end(text)
 686            .into_iter()
 687            .choose(&mut rng)
 688            .unwrap();
 689        let (a, b) = chunk.as_slice().split_at(offset);
 690        let (a_str, b_str) = text.split_at(offset);
 691        verify_chunk(a, a_str);
 692        verify_chunk(b, b_str);
 693    }
 694
 695    #[gpui::test(iterations = 1000)]
 696    fn test_nth_set_bit_random(mut rng: StdRng) {
 697        let set_count = rng.random_range(0..=128);
 698        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
 699        set_bits.sort();
 700        let mut n = 0;
 701        for ix in set_bits.iter().copied() {
 702            n |= 1 << ix;
 703        }
 704
 705        for (mut ix, position) in set_bits.into_iter().enumerate() {
 706            ix += 1;
 707            assert_eq!(
 708                nth_set_bit(n, ix),
 709                position,
 710                "nth_set_bit({:0128b}, {})",
 711                n,
 712                ix
 713            );
 714        }
 715    }
 716
 717    /// Returns a (biased) random string whose UTF-8 length is no more than `len`.
 718    fn random_string_with_utf8_len(rng: &mut StdRng, len: usize) -> String {
 719        let mut str = String::new();
 720        let mut chars = RandomCharIter::new(rng);
 721        loop {
 722            let ch = chars.next().unwrap();
 723            if str.len() + ch.len_utf8() > len {
 724                break;
 725            }
 726            str.push(ch);
 727        }
 728        str
 729    }
 730
 731    #[gpui::test(iterations = 1000)]
 732    fn test_append_random_strings(mut rng: StdRng) {
 733        let len1 = rng.random_range(0..=MAX_BASE);
 734        let len2 = rng.random_range(0..=MAX_BASE).saturating_sub(len1);
 735        let str1 = random_string_with_utf8_len(&mut rng, len1);
 736        let str2 = random_string_with_utf8_len(&mut rng, len2);
 737        let mut chunk1 = Chunk::new(&str1);
 738        let chunk2 = Chunk::new(&str2);
 739        let char_offsets = char_offsets_with_end(&str2);
 740        let start_index = rng.random_range(0..char_offsets.len());
 741        let start_offset = char_offsets[start_index];
 742        let end_offset = char_offsets[rng.random_range(start_index..char_offsets.len())];
 743        chunk1.append(chunk2.slice(start_offset..end_offset));
 744        verify_chunk(chunk1.as_slice(), &(str1 + &str2[start_offset..end_offset]));
 745    }
 746
 747    /// Return the byte offsets for each character in a string.
 748    ///
 749    /// These are valid offsets to split the string.
 750    fn char_offsets(text: &str) -> Vec<usize> {
 751        text.char_indices().map(|(i, _c)| i).collect()
 752    }
 753
 754    /// Return the byte offsets for each character in a string, plus the offset
 755    /// past the end of the string.
 756    fn char_offsets_with_end(text: &str) -> Vec<usize> {
 757        let mut v = char_offsets(text);
 758        v.push(text.len());
 759        v
 760    }
 761
 762    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
 763        let mut offset = 0;
 764        let mut offset_utf16 = OffsetUtf16(0);
 765        let mut point = Point::zero();
 766        let mut point_utf16 = PointUtf16::zero();
 767
 768        log::info!("Verifying chunk {:?}", text);
 769        assert_eq!(chunk.offset_to_point(0), Point::zero());
 770
 771        let mut expected_tab_positions = Vec::new();
 772
 773        for (char_offset, c) in text.chars().enumerate() {
 774            let expected_point = chunk.offset_to_point(offset);
 775            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
 776            assert_eq!(
 777                chunk.point_to_offset(point),
 778                offset,
 779                "mismatch at point {:?}",
 780                point
 781            );
 782            assert_eq!(
 783                chunk.offset_to_offset_utf16(offset),
 784                offset_utf16,
 785                "mismatch at offset {}",
 786                offset
 787            );
 788            assert_eq!(
 789                chunk.offset_utf16_to_offset(offset_utf16),
 790                offset,
 791                "mismatch at offset_utf16 {:?}",
 792                offset_utf16
 793            );
 794            assert_eq!(
 795                chunk.point_to_point_utf16(point),
 796                point_utf16,
 797                "mismatch at point {:?}",
 798                point
 799            );
 800            assert_eq!(
 801                chunk.point_utf16_to_offset(point_utf16, false),
 802                offset,
 803                "mismatch at point_utf16 {:?}",
 804                point_utf16
 805            );
 806            assert_eq!(
 807                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 808                point,
 809                "mismatch for unclipped_point_utf16_to_point at {:?}",
 810                point_utf16
 811            );
 812
 813            assert_eq!(
 814                chunk.clip_point(point, Bias::Left),
 815                point,
 816                "incorrect left clip at {:?}",
 817                point
 818            );
 819            assert_eq!(
 820                chunk.clip_point(point, Bias::Right),
 821                point,
 822                "incorrect right clip at {:?}",
 823                point
 824            );
 825
 826            for i in 1..c.len_utf8() {
 827                let test_point = Point::new(point.row, point.column + i as u32);
 828                assert_eq!(
 829                    chunk.clip_point(test_point, Bias::Left),
 830                    point,
 831                    "incorrect left clip within multi-byte char at {:?}",
 832                    test_point
 833                );
 834                assert_eq!(
 835                    chunk.clip_point(test_point, Bias::Right),
 836                    Point::new(point.row, point.column + c.len_utf8() as u32),
 837                    "incorrect right clip within multi-byte char at {:?}",
 838                    test_point
 839                );
 840            }
 841
 842            for i in 1..c.len_utf16() {
 843                let test_point = Unclipped(PointUtf16::new(
 844                    point_utf16.row,
 845                    point_utf16.column + i as u32,
 846                ));
 847                assert_eq!(
 848                    chunk.unclipped_point_utf16_to_point(test_point),
 849                    point,
 850                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
 851                    test_point
 852                );
 853                assert_eq!(
 854                    chunk.clip_point_utf16(test_point, Bias::Left),
 855                    point_utf16,
 856                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
 857                    test_point
 858                );
 859                assert_eq!(
 860                    chunk.clip_point_utf16(test_point, Bias::Right),
 861                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
 862                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
 863                    test_point
 864                );
 865
 866                let test_offset = OffsetUtf16(offset_utf16.0 + i);
 867                assert_eq!(
 868                    chunk.clip_offset_utf16(test_offset, Bias::Left),
 869                    offset_utf16,
 870                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
 871                    test_offset
 872                );
 873                assert_eq!(
 874                    chunk.clip_offset_utf16(test_offset, Bias::Right),
 875                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
 876                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
 877                    test_offset
 878                );
 879            }
 880
 881            if c == '\n' {
 882                point.row += 1;
 883                point.column = 0;
 884                point_utf16.row += 1;
 885                point_utf16.column = 0;
 886            } else {
 887                point.column += c.len_utf8() as u32;
 888                point_utf16.column += c.len_utf16() as u32;
 889            }
 890
 891            if c == '\t' {
 892                expected_tab_positions.push(TabPosition {
 893                    byte_offset: offset,
 894                    char_offset,
 895                });
 896            }
 897
 898            offset += c.len_utf8();
 899            offset_utf16.0 += c.len_utf16();
 900        }
 901
 902        let final_point = chunk.offset_to_point(offset);
 903        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
 904        assert_eq!(
 905            chunk.point_to_offset(point),
 906            offset,
 907            "mismatch at point {:?}",
 908            point
 909        );
 910        assert_eq!(
 911            chunk.offset_to_offset_utf16(offset),
 912            offset_utf16,
 913            "mismatch at offset {}",
 914            offset
 915        );
 916        assert_eq!(
 917            chunk.offset_utf16_to_offset(offset_utf16),
 918            offset,
 919            "mismatch at offset_utf16 {:?}",
 920            offset_utf16
 921        );
 922        assert_eq!(
 923            chunk.point_to_point_utf16(point),
 924            point_utf16,
 925            "mismatch at final point {:?}",
 926            point
 927        );
 928        assert_eq!(
 929            chunk.point_utf16_to_offset(point_utf16, false),
 930            offset,
 931            "mismatch at final point_utf16 {:?}",
 932            point_utf16
 933        );
 934        assert_eq!(
 935            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 936            point,
 937            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
 938            point_utf16
 939        );
 940        assert_eq!(
 941            chunk.clip_point(point, Bias::Left),
 942            point,
 943            "incorrect left clip at final point {:?}",
 944            point
 945        );
 946        assert_eq!(
 947            chunk.clip_point(point, Bias::Right),
 948            point,
 949            "incorrect right clip at final point {:?}",
 950            point
 951        );
 952        assert_eq!(
 953            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
 954            point_utf16,
 955            "incorrect left clip_point_utf16 at final point {:?}",
 956            point_utf16
 957        );
 958        assert_eq!(
 959            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
 960            point_utf16,
 961            "incorrect right clip_point_utf16 at final point {:?}",
 962            point_utf16
 963        );
 964        assert_eq!(
 965            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
 966            offset_utf16,
 967            "incorrect left clip_offset_utf16 at final offset {:?}",
 968            offset_utf16
 969        );
 970        assert_eq!(
 971            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
 972            offset_utf16,
 973            "incorrect right clip_offset_utf16 at final offset {:?}",
 974            offset_utf16
 975        );
 976
 977        // Verify length methods
 978        assert_eq!(chunk.len(), text.len());
 979        assert_eq!(
 980            chunk.len_utf16().0,
 981            text.chars().map(|c| c.len_utf16()).sum::<usize>()
 982        );
 983
 984        // Verify line counting
 985        let lines = chunk.lines();
 986        let mut newline_count = 0;
 987        let mut last_line_len = 0;
 988        for c in text.chars() {
 989            if c == '\n' {
 990                newline_count += 1;
 991                last_line_len = 0;
 992            } else {
 993                last_line_len += c.len_utf8() as u32;
 994            }
 995        }
 996        assert_eq!(lines, Point::new(newline_count, last_line_len));
 997
 998        // Verify first/last line chars
 999        if !text.is_empty() {
1000            let first_line = text.split('\n').next().unwrap();
1001            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
1002
1003            let last_line = text.split('\n').next_back().unwrap();
1004            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
1005            assert_eq!(
1006                chunk.last_line_len_utf16(),
1007                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
1008            );
1009        }
1010
1011        // Verify longest row
1012        let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
1013        let mut max_chars = 0;
1014        let mut current_row = 0;
1015        let mut current_chars = 0;
1016        let mut max_row = 0;
1017
1018        for c in text.chars() {
1019            if c == '\n' {
1020                if current_chars > max_chars {
1021                    max_chars = current_chars;
1022                    max_row = current_row;
1023                }
1024                current_row += 1;
1025                current_chars = 0;
1026            } else {
1027                current_chars += 1;
1028            }
1029        }
1030
1031        if current_chars > max_chars {
1032            max_chars = current_chars;
1033            max_row = current_row;
1034        }
1035
1036        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
1037        assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
1038    }
1039}