chunk.rs

   1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
   2use arrayvec::ArrayString;
   3use std::{cmp, ops::Range};
   4use sum_tree::Bias;
   5use unicode_segmentation::GraphemeCursor;
   6use util::debug_panic;
   7
   8#[cfg(not(all(test, not(rust_analyzer))))]
   9pub(crate) type Bitmap = u128;
  10#[cfg(all(test, not(rust_analyzer)))]
  11pub(crate) type Bitmap = u16;
  12
  13pub(crate) const MIN_BASE: usize = MAX_BASE / 2;
  14pub(crate) const MAX_BASE: usize = Bitmap::BITS as usize;
  15
  16#[derive(Clone, Debug, Default)]
  17pub struct Chunk {
  18    /// If bit[i] is set, then the character at index i is the start of a UTF-8 character in the
  19    /// text.
  20    chars: Bitmap,
  21    /// The number of set bits is the number of UTF-16 code units it would take to represent the
  22    /// text.
  23    ///
  24    /// Bit[i] is set if text[i] is the start of a UTF-8 character. If the character would
  25    /// take two UTF-16 code units, then bit[i+1] is also set. (Rust chars never take more
  26    /// than two UTF-16 code units.)
  27    chars_utf16: Bitmap,
  28    /// If bit[i] is set, then the character at index i is an ascii newline.
  29    newlines: Bitmap,
  30    /// If bit[i] is set, then the character at index i is an ascii tab.
  31    tabs: Bitmap,
  32    pub text: ArrayString<MAX_BASE>,
  33}
  34
  35impl Chunk {
  36    pub const MASK_BITS: usize = Bitmap::BITS as usize;
  37
  38    #[inline(always)]
  39    pub fn new(text: &str) -> Self {
  40        let mut this = Chunk::default();
  41        this.push_str(text);
  42        this
  43    }
  44
  45    #[inline(always)]
  46    pub fn push_str(&mut self, text: &str) {
  47        for (char_ix, c) in text.char_indices() {
  48            let ix = self.text.len() + char_ix;
  49            self.chars |= 1 << ix;
  50            self.chars_utf16 |= 1 << ix;
  51            self.chars_utf16 |= (c.len_utf16() as Bitmap) << ix;
  52            self.newlines |= ((c == '\n') as Bitmap) << ix;
  53            self.tabs |= ((c == '\t') as Bitmap) << ix;
  54        }
  55        self.text.push_str(text);
  56    }
  57
  58    #[inline(always)]
  59    pub fn append(&mut self, slice: ChunkSlice) {
  60        if slice.is_empty() {
  61            return;
  62        };
  63
  64        let base_ix = self.text.len();
  65        self.chars |= slice.chars << base_ix;
  66        self.chars_utf16 |= slice.chars_utf16 << base_ix;
  67        self.newlines |= slice.newlines << base_ix;
  68        self.tabs |= slice.tabs << base_ix;
  69        self.text.push_str(slice.text);
  70    }
  71
  72    #[inline(always)]
  73    pub fn as_slice(&self) -> ChunkSlice<'_> {
  74        ChunkSlice {
  75            chars: self.chars,
  76            chars_utf16: self.chars_utf16,
  77            newlines: self.newlines,
  78            tabs: self.tabs,
  79            text: &self.text,
  80        }
  81    }
  82
  83    #[inline(always)]
  84    pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
  85        self.as_slice().slice(range)
  86    }
  87
  88    #[inline(always)]
  89    pub fn chars(&self) -> Bitmap {
  90        self.chars
  91    }
  92
  93    pub fn tabs(&self) -> Bitmap {
  94        self.tabs
  95    }
  96}
  97
  98#[derive(Clone, Copy, Debug)]
  99pub struct ChunkSlice<'a> {
 100    chars: Bitmap,
 101    chars_utf16: Bitmap,
 102    newlines: Bitmap,
 103    tabs: Bitmap,
 104    text: &'a str,
 105}
 106
 107impl Into<Chunk> for ChunkSlice<'_> {
 108    fn into(self) -> Chunk {
 109        Chunk {
 110            chars: self.chars,
 111            chars_utf16: self.chars_utf16,
 112            newlines: self.newlines,
 113            tabs: self.tabs,
 114            text: self.text.try_into().unwrap(),
 115        }
 116    }
 117}
 118
 119impl<'a> ChunkSlice<'a> {
 120    #[inline(always)]
 121    pub fn is_empty(&self) -> bool {
 122        self.text.is_empty()
 123    }
 124
 125    #[inline(always)]
 126    pub fn is_char_boundary(self, offset: usize) -> bool {
 127        self.text.is_char_boundary(offset)
 128    }
 129
 130    #[inline(always)]
 131    pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
 132        if mid == MAX_BASE {
 133            let left = self;
 134            let right = ChunkSlice {
 135                chars: 0,
 136                chars_utf16: 0,
 137                newlines: 0,
 138                tabs: 0,
 139                text: "",
 140            };
 141            (left, right)
 142        } else {
 143            let mask = ((1 as Bitmap) << mid) - 1;
 144            let (left_text, right_text) = self.text.split_at(mid);
 145            let left = ChunkSlice {
 146                chars: self.chars & mask,
 147                chars_utf16: self.chars_utf16 & mask,
 148                newlines: self.newlines & mask,
 149                tabs: self.tabs & mask,
 150                text: left_text,
 151            };
 152            let right = ChunkSlice {
 153                chars: self.chars >> mid,
 154                chars_utf16: self.chars_utf16 >> mid,
 155                newlines: self.newlines >> mid,
 156                tabs: self.tabs >> mid,
 157                text: right_text,
 158            };
 159            (left, right)
 160        }
 161    }
 162
 163    #[inline(always)]
 164    pub fn slice(self, range: Range<usize>) -> Self {
 165        debug_assert!(
 166            self.is_char_boundary(range.end),
 167            "Invalid range end {} in {:?}",
 168            range.end,
 169            self
 170        );
 171        let mask = (1 as Bitmap)
 172            .unbounded_shl(range.end as u32)
 173            .wrapping_sub(1);
 174        if range.start == MAX_BASE {
 175            Self {
 176                chars: 0,
 177                chars_utf16: 0,
 178                newlines: 0,
 179                tabs: 0,
 180                text: "",
 181            }
 182        } else {
 183            debug_assert!(
 184                self.is_char_boundary(range.start),
 185                "Invalid range start {} in {:?}",
 186                range.start,
 187                self
 188            );
 189            Self {
 190                chars: (self.chars & mask) >> range.start,
 191                chars_utf16: (self.chars_utf16 & mask) >> range.start,
 192                newlines: (self.newlines & mask) >> range.start,
 193                tabs: (self.tabs & mask) >> range.start,
 194                text: &self.text[range],
 195            }
 196        }
 197    }
 198
 199    #[inline(always)]
 200    pub fn text_summary(&self) -> TextSummary {
 201        let mut chars = 0;
 202        let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
 203        TextSummary {
 204            len: self.len(),
 205            chars,
 206            len_utf16: self.len_utf16(),
 207            lines: self.lines(),
 208            first_line_chars: self.first_line_chars(),
 209            last_line_chars: self.last_line_chars(),
 210            last_line_len_utf16: self.last_line_len_utf16(),
 211            longest_row,
 212            longest_row_chars,
 213        }
 214    }
 215
 216    /// Get length in bytes
 217    #[inline(always)]
 218    pub fn len(&self) -> usize {
 219        self.text.len()
 220    }
 221
 222    /// Get length in UTF-16 code units
 223    #[inline(always)]
 224    pub fn len_utf16(&self) -> OffsetUtf16 {
 225        OffsetUtf16(self.chars_utf16.count_ones() as usize)
 226    }
 227
 228    /// Get point representing number of lines and length of last line
 229    #[inline(always)]
 230    pub fn lines(&self) -> Point {
 231        let row = self.newlines.count_ones();
 232        let column = self.newlines.leading_zeros() - (Bitmap::BITS - self.text.len() as u32);
 233        Point::new(row, column)
 234    }
 235
 236    /// Get number of chars in first line
 237    #[inline(always)]
 238    pub fn first_line_chars(&self) -> u32 {
 239        if self.newlines == 0 {
 240            self.chars.count_ones()
 241        } else {
 242            let mask = ((1 as Bitmap) << self.newlines.trailing_zeros()) - 1;
 243            (self.chars & mask).count_ones()
 244        }
 245    }
 246
 247    /// Get number of chars in last line
 248    #[inline(always)]
 249    pub fn last_line_chars(&self) -> u32 {
 250        if self.newlines == 0 {
 251            self.chars.count_ones()
 252        } else {
 253            let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
 254            (self.chars & mask).count_ones()
 255        }
 256    }
 257
 258    /// Get number of UTF-16 code units in last line
 259    #[inline(always)]
 260    pub fn last_line_len_utf16(&self) -> u32 {
 261        if self.newlines == 0 {
 262            self.chars_utf16.count_ones()
 263        } else {
 264            let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
 265            (self.chars_utf16 & mask).count_ones()
 266        }
 267    }
 268
 269    /// Get the longest row in the chunk and its length in characters.
 270    /// Calculate the total number of characters in the chunk along the way.
 271    #[inline(always)]
 272    pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
 273        let mut chars = self.chars;
 274        let mut newlines = self.newlines;
 275        *total_chars = 0;
 276        let mut row = 0;
 277        let mut longest_row = 0;
 278        let mut longest_row_chars = 0;
 279        while newlines > 0 {
 280            let newline_ix = newlines.trailing_zeros();
 281            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
 282            *total_chars += usize::from(row_chars);
 283            if row_chars > longest_row_chars {
 284                longest_row = row;
 285                longest_row_chars = row_chars;
 286            }
 287
 288            newlines >>= newline_ix;
 289            newlines >>= 1;
 290            chars >>= newline_ix;
 291            chars >>= 1;
 292            row += 1;
 293            *total_chars += 1;
 294        }
 295
 296        let row_chars = chars.count_ones() as u8;
 297        *total_chars += usize::from(row_chars);
 298        if row_chars > longest_row_chars {
 299            (row, row_chars as u32)
 300        } else {
 301            (longest_row, longest_row_chars as u32)
 302        }
 303    }
 304
 305    #[inline(always)]
 306    pub fn offset_to_point(&self, offset: usize) -> Point {
 307        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 308        let row = (self.newlines & mask).count_ones();
 309        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 310        let column = (offset - newline_ix as usize) as u32;
 311        Point::new(row, column)
 312    }
 313
 314    #[inline(always)]
 315    pub fn point_to_offset(&self, point: Point) -> usize {
 316        if point.row > self.lines().row {
 317            debug_panic!(
 318                "point {:?} extends beyond rows for string {:?}",
 319                point,
 320                self.text
 321            );
 322            return self.len();
 323        }
 324
 325        let row_offset_range = self.offset_range_for_row(point.row);
 326        if point.column > row_offset_range.len() as u32 {
 327            debug_panic!(
 328                "point {:?} extends beyond row for string {:?}",
 329                point,
 330                self.text
 331            );
 332            row_offset_range.end
 333        } else {
 334            row_offset_range.start + point.column as usize
 335        }
 336    }
 337
 338    #[inline(always)]
 339    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 340        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 341        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
 342    }
 343
 344    #[inline(always)]
 345    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 346        if target.0 == 0 {
 347            0
 348        } else {
 349            #[cfg(not(test))]
 350            let chars_utf16 = self.chars_utf16;
 351            #[cfg(test)]
 352            let chars_utf16 = self.chars_utf16 as u128;
 353            let ix = nth_set_bit(chars_utf16, target.0) + 1;
 354            if ix == MAX_BASE {
 355                MAX_BASE
 356            } else {
 357                let utf8_additional_len = cmp::min(
 358                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
 359                    self.text.len() - ix,
 360                );
 361                ix + utf8_additional_len
 362            }
 363        }
 364    }
 365
 366    #[inline(always)]
 367    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 368        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 369        let row = (self.newlines & mask).count_ones();
 370        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 371        let column = if newline_ix as usize == MAX_BASE {
 372            0
 373        } else {
 374            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
 375        };
 376        PointUtf16::new(row, column)
 377    }
 378
 379    #[inline(always)]
 380    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 381        self.offset_to_point_utf16(self.point_to_offset(point))
 382    }
 383
 384    #[inline(always)]
 385    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
 386        let lines = self.lines();
 387        if point.row > lines.row {
 388            if !clip {
 389                debug_panic!(
 390                    "point {:?} is beyond this chunk's extent {:?}",
 391                    point,
 392                    self.text
 393                );
 394            }
 395            return self.len();
 396        }
 397
 398        let row_offset_range = self.offset_range_for_row(point.row);
 399        let line = self.slice(row_offset_range.clone());
 400        if point.column > line.last_line_len_utf16() {
 401            if !clip {
 402                debug_panic!(
 403                    "point {:?} is beyond the end of the line in chunk {:?}",
 404                    point,
 405                    self.text
 406                );
 407            }
 408            return line.len();
 409        }
 410
 411        let mut offset = row_offset_range.start;
 412        if point.column > 0 {
 413            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
 414            if !self.text.is_char_boundary(offset) {
 415                offset -= 1;
 416                while !self.text.is_char_boundary(offset) {
 417                    offset -= 1;
 418                }
 419                if !clip {
 420                    debug_panic!(
 421                        "point {:?} is within character in chunk {:?}",
 422                        point,
 423                        self.text,
 424                    );
 425                }
 426            }
 427        }
 428        offset
 429    }
 430
 431    #[inline(always)]
 432    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 433        let max_point = self.lines();
 434        if point.0.row > max_point.row {
 435            return max_point;
 436        }
 437
 438        let row_offset_range = self.offset_range_for_row(point.0.row);
 439        let line = self.slice(row_offset_range);
 440        if point.0.column == 0 {
 441            Point::new(point.0.row, 0)
 442        } else if point.0.column >= line.len_utf16().0 as u32 {
 443            Point::new(point.0.row, line.len() as u32)
 444        } else {
 445            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
 446            while !line.text.is_char_boundary(column) {
 447                column -= 1;
 448            }
 449            Point::new(point.0.row, column as u32)
 450        }
 451    }
 452
 453    #[inline(always)]
 454    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 455        let max_point = self.lines();
 456        if point.row > max_point.row {
 457            return max_point;
 458        }
 459
 460        let line = self.slice(self.offset_range_for_row(point.row));
 461        if point.column == 0 {
 462            point
 463        } else if point.column >= line.len() as u32 {
 464            Point::new(point.row, line.len() as u32)
 465        } else {
 466            let mut column = point.column as usize;
 467            let bytes = line.text.as_bytes();
 468            if bytes[column - 1] < 128 && bytes[column] < 128 {
 469                return Point::new(point.row, column as u32);
 470            }
 471
 472            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
 473            loop {
 474                if line.is_char_boundary(column)
 475                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
 476                {
 477                    break;
 478                }
 479
 480                match bias {
 481                    Bias::Left => column -= 1,
 482                    Bias::Right => column += 1,
 483                }
 484                grapheme_cursor.set_cursor(column);
 485            }
 486            Point::new(point.row, column as u32)
 487        }
 488    }
 489
 490    #[inline(always)]
 491    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 492        let max_point = self.lines();
 493        if point.0.row > max_point.row {
 494            PointUtf16::new(max_point.row, self.last_line_len_utf16())
 495        } else {
 496            let line = self.slice(self.offset_range_for_row(point.0.row));
 497            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
 498            PointUtf16::new(point.0.row, column.0 as u32)
 499        }
 500    }
 501
 502    #[inline(always)]
 503    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 504        if target == OffsetUtf16::default() {
 505            OffsetUtf16::default()
 506        } else if target >= self.len_utf16() {
 507            self.len_utf16()
 508        } else {
 509            let mut offset = self.offset_utf16_to_offset(target);
 510            while !self.text.is_char_boundary(offset) {
 511                if bias == Bias::Left {
 512                    offset -= 1;
 513                } else {
 514                    offset += 1;
 515                }
 516            }
 517            self.offset_to_offset_utf16(offset)
 518        }
 519    }
 520
 521    #[inline(always)]
 522    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
 523        let row_start = if row > 0 {
 524            #[cfg(not(test))]
 525            let newlines = self.newlines;
 526            #[cfg(test)]
 527            let newlines = self.newlines as u128;
 528            nth_set_bit(newlines, row as usize) + 1
 529        } else {
 530            0
 531        };
 532        let row_len = if row_start == MAX_BASE {
 533            0
 534        } else {
 535            cmp::min(
 536                (self.newlines >> row_start).trailing_zeros(),
 537                (self.text.len() - row_start) as u32,
 538            )
 539        };
 540        row_start..row_start + row_len as usize
 541    }
 542
 543    #[inline(always)]
 544    pub fn tabs(&self) -> Tabs {
 545        Tabs {
 546            tabs: self.tabs,
 547            chars: self.chars,
 548        }
 549    }
 550}
 551
 552pub struct Tabs {
 553    tabs: Bitmap,
 554    chars: Bitmap,
 555}
 556
 557#[derive(Debug, PartialEq, Eq)]
 558pub struct TabPosition {
 559    pub byte_offset: usize,
 560    pub char_offset: usize,
 561}
 562
 563impl Iterator for Tabs {
 564    type Item = TabPosition;
 565
 566    fn next(&mut self) -> Option<Self::Item> {
 567        if self.tabs == 0 {
 568            return None;
 569        }
 570
 571        let tab_offset = self.tabs.trailing_zeros() as usize;
 572        let chars_mask = (1 << tab_offset) - 1;
 573        let char_offset = (self.chars & chars_mask).count_ones() as usize;
 574
 575        // Since tabs are 1 byte the tab offset is the same as the byte offset
 576        let position = TabPosition {
 577            byte_offset: tab_offset,
 578            char_offset,
 579        };
 580        // Remove the tab we've just seen
 581        self.tabs ^= 1 << tab_offset;
 582
 583        Some(position)
 584    }
 585}
 586
 587/// Finds the n-th bit that is set to 1.
 588#[inline(always)]
 589fn nth_set_bit(v: u128, n: usize) -> usize {
 590    let low = v as u64;
 591    let high = (v >> 64) as u64;
 592
 593    let low_count = low.count_ones() as usize;
 594    if n > low_count {
 595        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
 596    } else {
 597        nth_set_bit_u64(low, n as u64) as usize
 598    }
 599}
 600
 601#[inline(always)]
 602fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
 603    let v = v.reverse_bits();
 604    let mut s: u64 = 64;
 605
 606    // Parallel bit count intermediates
 607    let a = v - ((v >> 1) & (u64::MAX / 3));
 608    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
 609    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
 610    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
 611
 612    // Branchless select
 613    let t = (d >> 32) + (d >> 48);
 614    s -= (t.wrapping_sub(n) & 256) >> 3;
 615    n -= t & (t.wrapping_sub(n) >> 8);
 616
 617    let t = (d >> (s - 16)) & 0xff;
 618    s -= (t.wrapping_sub(n) & 256) >> 4;
 619    n -= t & (t.wrapping_sub(n) >> 8);
 620
 621    let t = (c >> (s - 8)) & 0xf;
 622    s -= (t.wrapping_sub(n) & 256) >> 5;
 623    n -= t & (t.wrapping_sub(n) >> 8);
 624
 625    let t = (b >> (s - 4)) & 0x7;
 626    s -= (t.wrapping_sub(n) & 256) >> 6;
 627    n -= t & (t.wrapping_sub(n) >> 8);
 628
 629    let t = (a >> (s - 2)) & 0x3;
 630    s -= (t.wrapping_sub(n) & 256) >> 7;
 631    n -= t & (t.wrapping_sub(n) >> 8);
 632
 633    let t = (v >> (s - 1)) & 0x1;
 634    s -= (t.wrapping_sub(n) & 256) >> 8;
 635
 636    65 - s - 1
 637}
 638
 639#[cfg(test)]
 640mod tests {
 641    use super::*;
 642    use rand::prelude::*;
 643    use util::RandomCharIter;
 644
 645    #[gpui::test(iterations = 100)]
 646    fn test_random_chunks(mut rng: StdRng) {
 647        let text = random_string_with_utf8_len(&mut rng, MAX_BASE);
 648        log::info!("Chunk: {:?}", text);
 649        let chunk = Chunk::new(&text);
 650        verify_chunk(chunk.as_slice(), &text);
 651
 652        // Verify Chunk::chars() bitmap
 653        let expected_chars = char_offsets(&text)
 654            .into_iter()
 655            .inspect(|i| assert!(*i < MAX_BASE))
 656            .fold(0 as Bitmap, |acc, i| acc | (1 << i));
 657        assert_eq!(chunk.chars(), expected_chars);
 658
 659        for _ in 0..10 {
 660            let mut start = rng.random_range(0..=chunk.text.len());
 661            let mut end = rng.random_range(start..=chunk.text.len());
 662            while !chunk.text.is_char_boundary(start) {
 663                start -= 1;
 664            }
 665            while !chunk.text.is_char_boundary(end) {
 666                end -= 1;
 667            }
 668            let range = start..end;
 669            log::info!("Range: {:?}", range);
 670            let text_slice = &text[range.clone()];
 671            let chunk_slice = chunk.slice(range);
 672            verify_chunk(chunk_slice, text_slice);
 673        }
 674    }
 675
 676    #[gpui::test(iterations = 100)]
 677    fn test_split_chunk_slice(mut rng: StdRng) {
 678        let text = &random_string_with_utf8_len(&mut rng, MAX_BASE);
 679        let chunk = Chunk::new(text);
 680        let offset = char_offsets_with_end(text)
 681            .into_iter()
 682            .choose(&mut rng)
 683            .unwrap();
 684        let (a, b) = chunk.as_slice().split_at(offset);
 685        let (a_str, b_str) = text.split_at(offset);
 686        verify_chunk(a, a_str);
 687        verify_chunk(b, b_str);
 688    }
 689
 690    #[gpui::test(iterations = 1000)]
 691    fn test_nth_set_bit_random(mut rng: StdRng) {
 692        let set_count = rng.random_range(0..=128);
 693        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
 694        set_bits.sort();
 695        let mut n = 0;
 696        for ix in set_bits.iter().copied() {
 697            n |= 1 << ix;
 698        }
 699
 700        for (mut ix, position) in set_bits.into_iter().enumerate() {
 701            ix += 1;
 702            assert_eq!(
 703                nth_set_bit(n, ix),
 704                position,
 705                "nth_set_bit({:0128b}, {})",
 706                n,
 707                ix
 708            );
 709        }
 710    }
 711
 712    /// Returns a (biased) random string whose UTF-8 length is no more than `len`.
 713    fn random_string_with_utf8_len(rng: &mut StdRng, len: usize) -> String {
 714        let mut str = String::new();
 715        let mut chars = RandomCharIter::new(rng);
 716        loop {
 717            let ch = chars.next().unwrap();
 718            if str.len() + ch.len_utf8() > len {
 719                break;
 720            }
 721            str.push(ch);
 722        }
 723        str
 724    }
 725
 726    #[gpui::test(iterations = 1000)]
 727    fn test_append_random_strings(mut rng: StdRng) {
 728        let len1 = rng.random_range(0..=MAX_BASE);
 729        let len2 = rng.random_range(0..=MAX_BASE).saturating_sub(len1);
 730        let str1 = random_string_with_utf8_len(&mut rng, len1);
 731        let str2 = random_string_with_utf8_len(&mut rng, len2);
 732        let mut chunk1 = Chunk::new(&str1);
 733        let chunk2 = Chunk::new(&str2);
 734        let char_offsets = char_offsets_with_end(&str2);
 735        let start_index = rng.random_range(0..char_offsets.len());
 736        let start_offset = char_offsets[start_index];
 737        let end_offset = char_offsets[rng.random_range(start_index..char_offsets.len())];
 738        chunk1.append(chunk2.slice(start_offset..end_offset));
 739        verify_chunk(chunk1.as_slice(), &(str1 + &str2[start_offset..end_offset]));
 740    }
 741
 742    /// Return the byte offsets for each character in a string.
 743    ///
 744    /// These are valid offsets to split the string.
 745    fn char_offsets(text: &str) -> Vec<usize> {
 746        text.char_indices().map(|(i, _c)| i).collect()
 747    }
 748
 749    /// Return the byte offsets for each character in a string, plus the offset
 750    /// past the end of the string.
 751    fn char_offsets_with_end(text: &str) -> Vec<usize> {
 752        let mut v = char_offsets(text);
 753        v.push(text.len());
 754        v
 755    }
 756
 757    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
 758        let mut offset = 0;
 759        let mut offset_utf16 = OffsetUtf16(0);
 760        let mut point = Point::zero();
 761        let mut point_utf16 = PointUtf16::zero();
 762
 763        log::info!("Verifying chunk {:?}", text);
 764        assert_eq!(chunk.offset_to_point(0), Point::zero());
 765
 766        let mut expected_tab_positions = Vec::new();
 767
 768        for (char_offset, c) in text.chars().enumerate() {
 769            let expected_point = chunk.offset_to_point(offset);
 770            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
 771            assert_eq!(
 772                chunk.point_to_offset(point),
 773                offset,
 774                "mismatch at point {:?}",
 775                point
 776            );
 777            assert_eq!(
 778                chunk.offset_to_offset_utf16(offset),
 779                offset_utf16,
 780                "mismatch at offset {}",
 781                offset
 782            );
 783            assert_eq!(
 784                chunk.offset_utf16_to_offset(offset_utf16),
 785                offset,
 786                "mismatch at offset_utf16 {:?}",
 787                offset_utf16
 788            );
 789            assert_eq!(
 790                chunk.point_to_point_utf16(point),
 791                point_utf16,
 792                "mismatch at point {:?}",
 793                point
 794            );
 795            assert_eq!(
 796                chunk.point_utf16_to_offset(point_utf16, false),
 797                offset,
 798                "mismatch at point_utf16 {:?}",
 799                point_utf16
 800            );
 801            assert_eq!(
 802                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 803                point,
 804                "mismatch for unclipped_point_utf16_to_point at {:?}",
 805                point_utf16
 806            );
 807
 808            assert_eq!(
 809                chunk.clip_point(point, Bias::Left),
 810                point,
 811                "incorrect left clip at {:?}",
 812                point
 813            );
 814            assert_eq!(
 815                chunk.clip_point(point, Bias::Right),
 816                point,
 817                "incorrect right clip at {:?}",
 818                point
 819            );
 820
 821            for i in 1..c.len_utf8() {
 822                let test_point = Point::new(point.row, point.column + i as u32);
 823                assert_eq!(
 824                    chunk.clip_point(test_point, Bias::Left),
 825                    point,
 826                    "incorrect left clip within multi-byte char at {:?}",
 827                    test_point
 828                );
 829                assert_eq!(
 830                    chunk.clip_point(test_point, Bias::Right),
 831                    Point::new(point.row, point.column + c.len_utf8() as u32),
 832                    "incorrect right clip within multi-byte char at {:?}",
 833                    test_point
 834                );
 835            }
 836
 837            for i in 1..c.len_utf16() {
 838                let test_point = Unclipped(PointUtf16::new(
 839                    point_utf16.row,
 840                    point_utf16.column + i as u32,
 841                ));
 842                assert_eq!(
 843                    chunk.unclipped_point_utf16_to_point(test_point),
 844                    point,
 845                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
 846                    test_point
 847                );
 848                assert_eq!(
 849                    chunk.clip_point_utf16(test_point, Bias::Left),
 850                    point_utf16,
 851                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
 852                    test_point
 853                );
 854                assert_eq!(
 855                    chunk.clip_point_utf16(test_point, Bias::Right),
 856                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
 857                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
 858                    test_point
 859                );
 860
 861                let test_offset = OffsetUtf16(offset_utf16.0 + i);
 862                assert_eq!(
 863                    chunk.clip_offset_utf16(test_offset, Bias::Left),
 864                    offset_utf16,
 865                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
 866                    test_offset
 867                );
 868                assert_eq!(
 869                    chunk.clip_offset_utf16(test_offset, Bias::Right),
 870                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
 871                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
 872                    test_offset
 873                );
 874            }
 875
 876            if c == '\n' {
 877                point.row += 1;
 878                point.column = 0;
 879                point_utf16.row += 1;
 880                point_utf16.column = 0;
 881            } else {
 882                point.column += c.len_utf8() as u32;
 883                point_utf16.column += c.len_utf16() as u32;
 884            }
 885
 886            if c == '\t' {
 887                expected_tab_positions.push(TabPosition {
 888                    byte_offset: offset,
 889                    char_offset,
 890                });
 891            }
 892
 893            offset += c.len_utf8();
 894            offset_utf16.0 += c.len_utf16();
 895        }
 896
 897        let final_point = chunk.offset_to_point(offset);
 898        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
 899        assert_eq!(
 900            chunk.point_to_offset(point),
 901            offset,
 902            "mismatch at point {:?}",
 903            point
 904        );
 905        assert_eq!(
 906            chunk.offset_to_offset_utf16(offset),
 907            offset_utf16,
 908            "mismatch at offset {}",
 909            offset
 910        );
 911        assert_eq!(
 912            chunk.offset_utf16_to_offset(offset_utf16),
 913            offset,
 914            "mismatch at offset_utf16 {:?}",
 915            offset_utf16
 916        );
 917        assert_eq!(
 918            chunk.point_to_point_utf16(point),
 919            point_utf16,
 920            "mismatch at final point {:?}",
 921            point
 922        );
 923        assert_eq!(
 924            chunk.point_utf16_to_offset(point_utf16, false),
 925            offset,
 926            "mismatch at final point_utf16 {:?}",
 927            point_utf16
 928        );
 929        assert_eq!(
 930            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 931            point,
 932            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
 933            point_utf16
 934        );
 935        assert_eq!(
 936            chunk.clip_point(point, Bias::Left),
 937            point,
 938            "incorrect left clip at final point {:?}",
 939            point
 940        );
 941        assert_eq!(
 942            chunk.clip_point(point, Bias::Right),
 943            point,
 944            "incorrect right clip at final point {:?}",
 945            point
 946        );
 947        assert_eq!(
 948            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
 949            point_utf16,
 950            "incorrect left clip_point_utf16 at final point {:?}",
 951            point_utf16
 952        );
 953        assert_eq!(
 954            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
 955            point_utf16,
 956            "incorrect right clip_point_utf16 at final point {:?}",
 957            point_utf16
 958        );
 959        assert_eq!(
 960            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
 961            offset_utf16,
 962            "incorrect left clip_offset_utf16 at final offset {:?}",
 963            offset_utf16
 964        );
 965        assert_eq!(
 966            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
 967            offset_utf16,
 968            "incorrect right clip_offset_utf16 at final offset {:?}",
 969            offset_utf16
 970        );
 971
 972        // Verify length methods
 973        assert_eq!(chunk.len(), text.len());
 974        assert_eq!(
 975            chunk.len_utf16().0,
 976            text.chars().map(|c| c.len_utf16()).sum::<usize>()
 977        );
 978
 979        // Verify line counting
 980        let lines = chunk.lines();
 981        let mut newline_count = 0;
 982        let mut last_line_len = 0;
 983        for c in text.chars() {
 984            if c == '\n' {
 985                newline_count += 1;
 986                last_line_len = 0;
 987            } else {
 988                last_line_len += c.len_utf8() as u32;
 989            }
 990        }
 991        assert_eq!(lines, Point::new(newline_count, last_line_len));
 992
 993        // Verify first/last line chars
 994        if !text.is_empty() {
 995            let first_line = text.split('\n').next().unwrap();
 996            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
 997
 998            let last_line = text.split('\n').next_back().unwrap();
 999            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
1000            assert_eq!(
1001                chunk.last_line_len_utf16(),
1002                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
1003            );
1004        }
1005
1006        // Verify longest row
1007        let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
1008        let mut max_chars = 0;
1009        let mut current_row = 0;
1010        let mut current_chars = 0;
1011        let mut max_row = 0;
1012
1013        for c in text.chars() {
1014            if c == '\n' {
1015                if current_chars > max_chars {
1016                    max_chars = current_chars;
1017                    max_row = current_row;
1018                }
1019                current_row += 1;
1020                current_chars = 0;
1021            } else {
1022                current_chars += 1;
1023            }
1024        }
1025
1026        if current_chars > max_chars {
1027            max_chars = current_chars;
1028            max_row = current_row;
1029        }
1030
1031        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
1032        assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
1033    }
1034}