chunk.rs

   1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
   2use arrayvec::ArrayString;
   3use std::{cmp, ops::Range};
   4use sum_tree::Bias;
   5use unicode_segmentation::GraphemeCursor;
   6use util::debug_panic;
   7
   8#[cfg(not(all(test, not(rust_analyzer))))]
   9pub(crate) type Bitmap = u128;
  10#[cfg(all(test, not(rust_analyzer)))]
  11pub(crate) type Bitmap = u16;
  12
  13pub(crate) const MIN_BASE: usize = MAX_BASE / 2;
  14pub(crate) const MAX_BASE: usize = Bitmap::BITS as usize;
  15
  16#[derive(Clone, Debug, Default)]
  17pub struct Chunk {
  18    /// If bit[i] is set, then the character at index i is the start of a UTF-8 character in the
  19    /// text.
  20    chars: Bitmap,
  21    /// The number of set bits is the number of UTF-16 code units it would take to represent the
  22    /// text.
  23    ///
  24    /// Bit[i] is set if text[i] is the start of a UTF-8 character. If the character would
  25    /// take two UTF-16 code units, then bit[i+1] is also set. (Rust chars never take more
  26    /// than two UTF-16 code units.)
  27    chars_utf16: Bitmap,
  28    /// If bit[i] is set, then the character at index i is an ascii newline.
  29    newlines: Bitmap,
  30    /// If bit[i] is set, then the character at index i is an ascii tab.
  31    tabs: Bitmap,
  32    pub text: ArrayString<MAX_BASE>,
  33}
  34
  35#[inline(always)]
  36const fn saturating_shl_mask(offset: u32) -> Bitmap {
  37    (1 as Bitmap).unbounded_shl(offset).wrapping_sub(1)
  38}
  39
  40#[inline(always)]
  41const fn saturating_shr_mask(offset: u32) -> Bitmap {
  42    !Bitmap::MAX.unbounded_shr(offset)
  43}
  44
  45impl Chunk {
  46    pub const MASK_BITS: usize = Bitmap::BITS as usize;
  47
  48    #[inline(always)]
  49    pub fn new(text: &str) -> Self {
  50        let mut this = Chunk::default();
  51        this.push_str(text);
  52        this
  53    }
  54
  55    #[inline(always)]
  56    pub fn push_str(&mut self, text: &str) {
  57        for (char_ix, c) in text.char_indices() {
  58            let ix = self.text.len() + char_ix;
  59            self.chars |= 1 << ix;
  60            self.chars_utf16 |= 1 << ix;
  61            self.chars_utf16 |= (c.len_utf16() as Bitmap) << ix;
  62            self.newlines |= ((c == '\n') as Bitmap) << ix;
  63            self.tabs |= ((c == '\t') as Bitmap) << ix;
  64        }
  65        self.text.push_str(text);
  66    }
  67
  68    #[inline(always)]
  69    pub fn append(&mut self, slice: ChunkSlice) {
  70        if slice.is_empty() {
  71            return;
  72        };
  73
  74        let base_ix = self.text.len();
  75        self.chars |= slice.chars << base_ix;
  76        self.chars_utf16 |= slice.chars_utf16 << base_ix;
  77        self.newlines |= slice.newlines << base_ix;
  78        self.tabs |= slice.tabs << base_ix;
  79        self.text.push_str(slice.text);
  80    }
  81
  82    #[inline(always)]
  83    pub fn as_slice(&self) -> ChunkSlice<'_> {
  84        ChunkSlice {
  85            chars: self.chars,
  86            chars_utf16: self.chars_utf16,
  87            newlines: self.newlines,
  88            tabs: self.tabs,
  89            text: &self.text,
  90        }
  91    }
  92
  93    #[inline(always)]
  94    pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
  95        self.as_slice().slice(range)
  96    }
  97
  98    #[inline(always)]
  99    pub fn chars(&self) -> Bitmap {
 100        self.chars
 101    }
 102
 103    pub fn tabs(&self) -> Bitmap {
 104        self.tabs
 105    }
 106
 107    #[inline(always)]
 108    pub fn is_char_boundary(&self, offset: usize) -> bool {
 109        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 110    }
 111
 112    pub fn floor_char_boundary(&self, index: usize) -> usize {
 113        #[inline]
 114        pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
 115            // This is bit magic equivalent to: b < 128 || b >= 192
 116            (u8 as i8) >= -0x40
 117        }
 118
 119        if index >= self.text.len() {
 120            self.text.len()
 121        } else {
 122            let mut i = index;
 123            while i > 0 {
 124                if is_utf8_char_boundary(self.text.as_bytes()[i]) {
 125                    break;
 126                }
 127                i -= 1;
 128            }
 129
 130            i
 131        }
 132    }
 133
 134    #[track_caller]
 135    #[inline(always)]
 136    pub fn assert_char_boundary(&self, offset: usize) {
 137        if self.is_char_boundary(offset) {
 138            return;
 139        }
 140        panic_char_boundary(self, offset);
 141
 142        #[cold]
 143        #[inline(never)]
 144        #[track_caller]
 145        fn panic_char_boundary(chunk: &Chunk, offset: usize) {
 146            if offset > chunk.text.len() {
 147                panic!(
 148                    "byte index {} is out of bounds of `{:?}` (length: {})",
 149                    offset,
 150                    chunk.text,
 151                    chunk.text.len()
 152                );
 153            }
 154            // find the character
 155            let char_start = chunk.floor_char_boundary(offset);
 156            // `char_start` must be less than len and a char boundary
 157            let ch = chunk
 158                .text
 159                .get(char_start..)
 160                .unwrap()
 161                .chars()
 162                .next()
 163                .unwrap();
 164            let char_range = char_start..char_start + ch.len_utf8();
 165            panic!(
 166                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
 167                offset, ch, char_range,
 168            );
 169        }
 170    }
 171}
 172
 173#[derive(Clone, Copy, Debug)]
 174pub struct ChunkSlice<'a> {
 175    chars: Bitmap,
 176    chars_utf16: Bitmap,
 177    newlines: Bitmap,
 178    tabs: Bitmap,
 179    text: &'a str,
 180}
 181
 182impl Into<Chunk> for ChunkSlice<'_> {
 183    fn into(self) -> Chunk {
 184        Chunk {
 185            chars: self.chars,
 186            chars_utf16: self.chars_utf16,
 187            newlines: self.newlines,
 188            tabs: self.tabs,
 189            text: self.text.try_into().unwrap(),
 190        }
 191    }
 192}
 193
 194impl<'a> ChunkSlice<'a> {
 195    #[inline(always)]
 196    pub fn is_empty(&self) -> bool {
 197        self.text.is_empty()
 198    }
 199
 200    #[inline(always)]
 201    pub fn is_char_boundary(&self, offset: usize) -> bool {
 202        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 203    }
 204
 205    #[inline(always)]
 206    pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
 207        if mid == MAX_BASE {
 208            let left = self;
 209            let right = ChunkSlice {
 210                chars: 0,
 211                chars_utf16: 0,
 212                newlines: 0,
 213                tabs: 0,
 214                text: "",
 215            };
 216            (left, right)
 217        } else {
 218            let mask = ((1 as Bitmap) << mid) - 1;
 219            let (left_text, right_text) = self.text.split_at(mid);
 220            let left = ChunkSlice {
 221                chars: self.chars & mask,
 222                chars_utf16: self.chars_utf16 & mask,
 223                newlines: self.newlines & mask,
 224                tabs: self.tabs & mask,
 225                text: left_text,
 226            };
 227            let right = ChunkSlice {
 228                chars: self.chars >> mid,
 229                chars_utf16: self.chars_utf16 >> mid,
 230                newlines: self.newlines >> mid,
 231                tabs: self.tabs >> mid,
 232                text: right_text,
 233            };
 234            (left, right)
 235        }
 236    }
 237
 238    #[inline(always)]
 239    pub fn slice(self, range: Range<usize>) -> Self {
 240        let mask = (1 as Bitmap)
 241            .unbounded_shl(range.end as u32)
 242            .wrapping_sub(1);
 243        if range.start == MAX_BASE {
 244            Self {
 245                chars: 0,
 246                chars_utf16: 0,
 247                newlines: 0,
 248                tabs: 0,
 249                text: "",
 250            }
 251        } else {
 252            self.assert_char_boundary(range.start);
 253            self.assert_char_boundary(range.end);
 254            Self {
 255                chars: (self.chars & mask) >> range.start,
 256                chars_utf16: (self.chars_utf16 & mask) >> range.start,
 257                newlines: (self.newlines & mask) >> range.start,
 258                tabs: (self.tabs & mask) >> range.start,
 259                text: &self.text[range],
 260            }
 261        }
 262    }
 263
 264    #[inline(always)]
 265    pub fn text_summary(&self) -> TextSummary {
 266        let mut chars = 0;
 267        let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
 268        TextSummary {
 269            len: self.len(),
 270            chars,
 271            len_utf16: self.len_utf16(),
 272            lines: self.lines(),
 273            first_line_chars: self.first_line_chars(),
 274            last_line_chars: self.last_line_chars(),
 275            last_line_len_utf16: self.last_line_len_utf16(),
 276            longest_row,
 277            longest_row_chars,
 278        }
 279    }
 280
 281    /// Get length in bytes
 282    #[inline(always)]
 283    pub fn len(&self) -> usize {
 284        self.text.len()
 285    }
 286
 287    /// Get length in UTF-16 code units
 288    #[inline(always)]
 289    pub fn len_utf16(&self) -> OffsetUtf16 {
 290        OffsetUtf16(self.chars_utf16.count_ones() as usize)
 291    }
 292
 293    /// Get point representing number of lines and length of last line
 294    #[inline(always)]
 295    pub fn lines(&self) -> Point {
 296        let row = self.newlines.count_ones();
 297        let column = self.newlines.leading_zeros() - (Bitmap::BITS - self.text.len() as u32);
 298        Point::new(row, column)
 299    }
 300
 301    /// Get number of chars in first line
 302    #[inline(always)]
 303    pub fn first_line_chars(&self) -> u32 {
 304        (self.chars & saturating_shl_mask(self.newlines.trailing_zeros())).count_ones()
 305    }
 306
 307    /// Get number of chars in last line
 308    #[inline(always)]
 309    pub fn last_line_chars(&self) -> u32 {
 310        (self.chars & saturating_shr_mask(self.newlines.leading_zeros())).count_ones()
 311    }
 312
 313    /// Get number of UTF-16 code units in last line
 314    #[inline(always)]
 315    pub fn last_line_len_utf16(&self) -> u32 {
 316        (self.chars_utf16 & saturating_shr_mask(self.newlines.leading_zeros())).count_ones()
 317    }
 318
 319    /// Get the longest row in the chunk and its length in characters.
 320    /// Calculate the total number of characters in the chunk along the way.
 321    #[inline(always)]
 322    pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
 323        let mut chars = self.chars;
 324        let mut newlines = self.newlines;
 325        *total_chars = 0;
 326        let mut row = 0;
 327        let mut longest_row = 0;
 328        let mut longest_row_chars = 0;
 329        while newlines > 0 {
 330            let newline_ix = newlines.trailing_zeros();
 331            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
 332            *total_chars += usize::from(row_chars);
 333            if row_chars > longest_row_chars {
 334                longest_row = row;
 335                longest_row_chars = row_chars;
 336            }
 337
 338            newlines >>= newline_ix;
 339            newlines >>= 1;
 340            chars >>= newline_ix;
 341            chars >>= 1;
 342            row += 1;
 343            *total_chars += 1;
 344        }
 345
 346        let row_chars = chars.count_ones() as u8;
 347        *total_chars += usize::from(row_chars);
 348        if row_chars > longest_row_chars {
 349            (row, row_chars as u32)
 350        } else {
 351            (longest_row, longest_row_chars as u32)
 352        }
 353    }
 354
 355    #[inline(always)]
 356    pub fn offset_to_point(&self, offset: usize) -> Point {
 357        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 358        let row = (self.newlines & mask).count_ones();
 359        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 360        let column = (offset - newline_ix as usize) as u32;
 361        Point::new(row, column)
 362    }
 363
 364    #[inline(always)]
 365    pub fn point_to_offset(&self, point: Point) -> usize {
 366        if point.row > self.lines().row {
 367            debug_panic!(
 368                "point {:?} extends beyond rows for string {:?}",
 369                point,
 370                self.text
 371            );
 372            return self.len();
 373        }
 374
 375        let row_offset_range = self.offset_range_for_row(point.row);
 376        if point.column > row_offset_range.len() as u32 {
 377            debug_panic!(
 378                "point {:?} extends beyond row for string {:?}",
 379                point,
 380                self.text
 381            );
 382            row_offset_range.end
 383        } else {
 384            row_offset_range.start + point.column as usize
 385        }
 386    }
 387
 388    #[track_caller]
 389    #[inline(always)]
 390    pub fn assert_char_boundary(&self, offset: usize) {
 391        if self.is_char_boundary(offset) {
 392            return;
 393        }
 394        panic_char_boundary(self, offset);
 395
 396        #[cold]
 397        #[inline(never)]
 398        fn panic_char_boundary(chunk: &ChunkSlice, offset: usize) {
 399            if offset > chunk.text.len() {
 400                panic!(
 401                    "byte index {} is out of bounds of `{:?}` (length: {})",
 402                    offset,
 403                    chunk.text,
 404                    chunk.text.len()
 405                );
 406            }
 407            // find the character
 408            let char_start = chunk.floor_char_boundary(offset);
 409            // `char_start` must be less than len and a char boundary
 410            let ch = chunk
 411                .text
 412                .get(char_start..)
 413                .unwrap()
 414                .chars()
 415                .next()
 416                .unwrap();
 417            let char_range = char_start..char_start + ch.len_utf8();
 418            panic!(
 419                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
 420                offset, ch, char_range,
 421            );
 422        }
 423    }
 424
 425    pub fn floor_char_boundary(&self, index: usize) -> usize {
 426        #[inline]
 427        pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
 428            // This is bit magic equivalent to: b < 128 || b >= 192
 429            (u8 as i8) >= -0x40
 430        }
 431
 432        if index >= self.text.len() {
 433            self.text.len()
 434        } else {
 435            let mut i = index;
 436            while i > 0 {
 437                if is_utf8_char_boundary(self.text.as_bytes()[i]) {
 438                    break;
 439                }
 440                i -= 1;
 441            }
 442
 443            i
 444        }
 445    }
 446
 447    #[inline(always)]
 448    pub fn point_to_offset_utf16(&self, point: Point) -> OffsetUtf16 {
 449        if point.row > self.lines().row {
 450            debug_panic!(
 451                "point {:?} extends beyond rows for string {:?}",
 452                point,
 453                self.text
 454            );
 455            return self.len_utf16();
 456        }
 457        self.offset_to_offset_utf16(self.point_to_offset(point))
 458    }
 459
 460    #[inline(always)]
 461    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 462        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 463        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
 464    }
 465
 466    #[inline(always)]
 467    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 468        if target.0 == 0 {
 469            0
 470        } else {
 471            #[cfg(not(test))]
 472            let chars_utf16 = self.chars_utf16;
 473            #[cfg(test)]
 474            let chars_utf16 = self.chars_utf16 as u128;
 475            let ix = nth_set_bit(chars_utf16, target.0) + 1;
 476            if ix == MAX_BASE {
 477                MAX_BASE
 478            } else {
 479                let utf8_additional_len = cmp::min(
 480                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
 481                    self.text.len() - ix,
 482                );
 483                ix + utf8_additional_len
 484            }
 485        }
 486    }
 487
 488    #[inline(always)]
 489    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 490        let mask = saturating_shl_mask(offset as u32);
 491        let row = (self.newlines & saturating_shl_mask(offset as u32)).count_ones();
 492        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 493        let column = if newline_ix as usize == MAX_BASE {
 494            0
 495        } else {
 496            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
 497        };
 498        PointUtf16::new(row, column)
 499    }
 500
 501    #[inline(always)]
 502    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 503        self.offset_to_point_utf16(self.point_to_offset(point))
 504    }
 505
 506    #[inline(always)]
 507    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
 508        let lines = self.lines();
 509        if point.row > lines.row {
 510            if !clip {
 511                debug_panic!(
 512                    "point {:?} is beyond this chunk's extent {:?}",
 513                    point,
 514                    self.text
 515                );
 516            }
 517            return self.len();
 518        }
 519
 520        let row_offset_range = self.offset_range_for_row(point.row);
 521        let line = self.slice(row_offset_range.clone());
 522        if point.column > line.last_line_len_utf16() {
 523            if !clip {
 524                debug_panic!(
 525                    "point {:?} is beyond the end of the line in chunk {:?}",
 526                    point,
 527                    self.text
 528                );
 529            }
 530            return line.len();
 531        }
 532
 533        let mut offset = row_offset_range.start;
 534        if point.column > 0 {
 535            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
 536            if !self.text.is_char_boundary(offset) {
 537                offset -= 1;
 538                while !self.text.is_char_boundary(offset) {
 539                    offset -= 1;
 540                }
 541                if !clip {
 542                    debug_panic!(
 543                        "point {:?} is within character in chunk {:?}",
 544                        point,
 545                        self.text,
 546                    );
 547                }
 548            }
 549        }
 550        offset
 551    }
 552
 553    #[inline(always)]
 554    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 555        let max_point = self.lines();
 556        if point.0.row > max_point.row {
 557            return max_point;
 558        }
 559
 560        let row_offset_range = self.offset_range_for_row(point.0.row);
 561        let line = self.slice(row_offset_range);
 562        if point.0.column == 0 {
 563            Point::new(point.0.row, 0)
 564        } else if point.0.column >= line.len_utf16().0 as u32 {
 565            Point::new(point.0.row, line.len() as u32)
 566        } else {
 567            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
 568            while !line.text.is_char_boundary(column) {
 569                column -= 1;
 570            }
 571            Point::new(point.0.row, column as u32)
 572        }
 573    }
 574
 575    #[inline(always)]
 576    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 577        let max_point = self.lines();
 578        if point.row > max_point.row {
 579            return max_point;
 580        }
 581
 582        let line = self.slice(self.offset_range_for_row(point.row));
 583        if point.column == 0 {
 584            point
 585        } else if point.column >= line.len() as u32 {
 586            Point::new(point.row, line.len() as u32)
 587        } else {
 588            let mut column = point.column as usize;
 589            let bytes = line.text.as_bytes();
 590            if bytes[column - 1] < 128 && bytes[column] < 128 {
 591                return Point::new(point.row, column as u32);
 592            }
 593
 594            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
 595            loop {
 596                if line.is_char_boundary(column)
 597                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
 598                {
 599                    break;
 600                }
 601
 602                match bias {
 603                    Bias::Left => column -= 1,
 604                    Bias::Right => column += 1,
 605                }
 606                grapheme_cursor.set_cursor(column);
 607            }
 608            Point::new(point.row, column as u32)
 609        }
 610    }
 611
 612    #[inline(always)]
 613    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 614        let max_point = self.lines();
 615        if point.0.row > max_point.row {
 616            PointUtf16::new(max_point.row, self.last_line_len_utf16())
 617        } else {
 618            let line = self.slice(self.offset_range_for_row(point.0.row));
 619            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
 620            PointUtf16::new(point.0.row, column.0 as u32)
 621        }
 622    }
 623
 624    #[inline(always)]
 625    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 626        if target == OffsetUtf16::default() {
 627            OffsetUtf16::default()
 628        } else if target >= self.len_utf16() {
 629            self.len_utf16()
 630        } else {
 631            let mut offset = self.offset_utf16_to_offset(target);
 632            while !self.text.is_char_boundary(offset) {
 633                if bias == Bias::Left {
 634                    offset -= 1;
 635                } else {
 636                    offset += 1;
 637                }
 638            }
 639            self.offset_to_offset_utf16(offset)
 640        }
 641    }
 642
 643    #[inline(always)]
 644    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
 645        let row_start = if row > 0 {
 646            #[cfg(not(test))]
 647            let newlines = self.newlines;
 648            #[cfg(test)]
 649            let newlines = self.newlines as u128;
 650            nth_set_bit(newlines, row as usize) + 1
 651        } else {
 652            0
 653        };
 654        let row_len = if row_start == MAX_BASE {
 655            0
 656        } else {
 657            cmp::min(
 658                (self.newlines >> row_start).trailing_zeros(),
 659                (self.text.len() - row_start) as u32,
 660            )
 661        };
 662        row_start..row_start + row_len as usize
 663    }
 664
 665    #[inline(always)]
 666    pub fn tabs(&self) -> Tabs {
 667        Tabs {
 668            tabs: self.tabs,
 669            chars: self.chars,
 670        }
 671    }
 672}
 673
 674pub struct Tabs {
 675    tabs: Bitmap,
 676    chars: Bitmap,
 677}
 678
 679#[derive(Debug, PartialEq, Eq)]
 680pub struct TabPosition {
 681    pub byte_offset: usize,
 682    pub char_offset: usize,
 683}
 684
 685impl Iterator for Tabs {
 686    type Item = TabPosition;
 687
 688    fn next(&mut self) -> Option<Self::Item> {
 689        if self.tabs == 0 {
 690            return None;
 691        }
 692
 693        let tab_offset = self.tabs.trailing_zeros() as usize;
 694        let chars_mask = (1 << tab_offset) - 1;
 695        let char_offset = (self.chars & chars_mask).count_ones() as usize;
 696
 697        // Since tabs are 1 byte the tab offset is the same as the byte offset
 698        let position = TabPosition {
 699            byte_offset: tab_offset,
 700            char_offset,
 701        };
 702        // Remove the tab we've just seen
 703        self.tabs ^= 1 << tab_offset;
 704
 705        Some(position)
 706    }
 707}
 708
 709/// Finds the n-th bit that is set to 1.
 710#[inline(always)]
 711fn nth_set_bit(v: u128, n: usize) -> usize {
 712    let low = v as u64;
 713    let high = (v >> 64) as u64;
 714
 715    let low_count = low.count_ones() as usize;
 716    if n > low_count {
 717        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
 718    } else {
 719        nth_set_bit_u64(low, n as u64) as usize
 720    }
 721}
 722
 723#[inline(always)]
 724fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
 725    let v = v.reverse_bits();
 726    let mut s: u64 = 64;
 727
 728    // Parallel bit count intermediates
 729    let a = v - ((v >> 1) & (u64::MAX / 3));
 730    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
 731    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
 732    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
 733
 734    // Branchless select
 735    let t = (d >> 32) + (d >> 48);
 736    s -= (t.wrapping_sub(n) & 256) >> 3;
 737    n -= t & (t.wrapping_sub(n) >> 8);
 738
 739    let t = (d >> (s - 16)) & 0xff;
 740    s -= (t.wrapping_sub(n) & 256) >> 4;
 741    n -= t & (t.wrapping_sub(n) >> 8);
 742
 743    let t = (c >> (s - 8)) & 0xf;
 744    s -= (t.wrapping_sub(n) & 256) >> 5;
 745    n -= t & (t.wrapping_sub(n) >> 8);
 746
 747    let t = (b >> (s - 4)) & 0x7;
 748    s -= (t.wrapping_sub(n) & 256) >> 6;
 749    n -= t & (t.wrapping_sub(n) >> 8);
 750
 751    let t = (a >> (s - 2)) & 0x3;
 752    s -= (t.wrapping_sub(n) & 256) >> 7;
 753    n -= t & (t.wrapping_sub(n) >> 8);
 754
 755    let t = (v >> (s - 1)) & 0x1;
 756    s -= (t.wrapping_sub(n) & 256) >> 8;
 757
 758    65 - s - 1
 759}
 760
 761#[cfg(test)]
 762mod tests {
 763    use super::*;
 764    use rand::prelude::*;
 765    use util::RandomCharIter;
 766
 767    #[gpui::test(iterations = 100)]
 768    fn test_random_chunks(mut rng: StdRng) {
 769        let text = random_string_with_utf8_len(&mut rng, MAX_BASE);
 770        log::info!("Chunk: {:?}", text);
 771        let chunk = Chunk::new(&text);
 772        verify_chunk(chunk.as_slice(), &text);
 773
 774        // Verify Chunk::chars() bitmap
 775        let expected_chars = char_offsets(&text)
 776            .into_iter()
 777            .inspect(|i| assert!(*i < MAX_BASE))
 778            .fold(0 as Bitmap, |acc, i| acc | (1 << i));
 779        assert_eq!(chunk.chars(), expected_chars);
 780
 781        for _ in 0..10 {
 782            let mut start = rng.random_range(0..=chunk.text.len());
 783            let mut end = rng.random_range(start..=chunk.text.len());
 784            while !chunk.text.is_char_boundary(start) {
 785                start -= 1;
 786            }
 787            while !chunk.text.is_char_boundary(end) {
 788                end -= 1;
 789            }
 790            let range = start..end;
 791            log::info!("Range: {:?}", range);
 792            let text_slice = &text[range.clone()];
 793            let chunk_slice = chunk.slice(range);
 794            verify_chunk(chunk_slice, text_slice);
 795        }
 796    }
 797
 798    #[gpui::test(iterations = 100)]
 799    fn test_split_chunk_slice(mut rng: StdRng) {
 800        let text = &random_string_with_utf8_len(&mut rng, MAX_BASE);
 801        let chunk = Chunk::new(text);
 802        let offset = char_offsets_with_end(text)
 803            .into_iter()
 804            .choose(&mut rng)
 805            .unwrap();
 806        let (a, b) = chunk.as_slice().split_at(offset);
 807        let (a_str, b_str) = text.split_at(offset);
 808        verify_chunk(a, a_str);
 809        verify_chunk(b, b_str);
 810    }
 811
 812    #[gpui::test(iterations = 1000)]
 813    fn test_nth_set_bit_random(mut rng: StdRng) {
 814        let set_count = rng.random_range(0..=128);
 815        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
 816        set_bits.sort();
 817        let mut n = 0;
 818        for ix in set_bits.iter().copied() {
 819            n |= 1 << ix;
 820        }
 821
 822        for (mut ix, position) in set_bits.into_iter().enumerate() {
 823            ix += 1;
 824            assert_eq!(
 825                nth_set_bit(n, ix),
 826                position,
 827                "nth_set_bit({:0128b}, {})",
 828                n,
 829                ix
 830            );
 831        }
 832    }
 833
 834    /// Returns a (biased) random string whose UTF-8 length is no more than `len`.
 835    fn random_string_with_utf8_len(rng: &mut StdRng, len: usize) -> String {
 836        let mut str = String::new();
 837        let mut chars = RandomCharIter::new(rng);
 838        loop {
 839            let ch = chars.next().unwrap();
 840            if str.len() + ch.len_utf8() > len {
 841                break;
 842            }
 843            str.push(ch);
 844        }
 845        str
 846    }
 847
 848    #[gpui::test(iterations = 1000)]
 849    fn test_append_random_strings(mut rng: StdRng) {
 850        let len1 = rng.random_range(0..=MAX_BASE);
 851        let len2 = rng.random_range(0..=MAX_BASE).saturating_sub(len1);
 852        let str1 = random_string_with_utf8_len(&mut rng, len1);
 853        let str2 = random_string_with_utf8_len(&mut rng, len2);
 854        let mut chunk1 = Chunk::new(&str1);
 855        let chunk2 = Chunk::new(&str2);
 856        let char_offsets = char_offsets_with_end(&str2);
 857        let start_index = rng.random_range(0..char_offsets.len());
 858        let start_offset = char_offsets[start_index];
 859        let end_offset = char_offsets[rng.random_range(start_index..char_offsets.len())];
 860        chunk1.append(chunk2.slice(start_offset..end_offset));
 861        verify_chunk(chunk1.as_slice(), &(str1 + &str2[start_offset..end_offset]));
 862    }
 863
 864    /// Return the byte offsets for each character in a string.
 865    ///
 866    /// These are valid offsets to split the string.
 867    fn char_offsets(text: &str) -> Vec<usize> {
 868        text.char_indices().map(|(i, _c)| i).collect()
 869    }
 870
 871    /// Return the byte offsets for each character in a string, plus the offset
 872    /// past the end of the string.
 873    fn char_offsets_with_end(text: &str) -> Vec<usize> {
 874        let mut v = char_offsets(text);
 875        v.push(text.len());
 876        v
 877    }
 878
 879    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
 880        let mut offset = 0;
 881        let mut offset_utf16 = OffsetUtf16(0);
 882        let mut point = Point::zero();
 883        let mut point_utf16 = PointUtf16::zero();
 884
 885        log::info!("Verifying chunk {:?}", text);
 886        assert_eq!(chunk.offset_to_point(0), Point::zero());
 887
 888        let mut expected_tab_positions = Vec::new();
 889
 890        for (char_offset, c) in text.chars().enumerate() {
 891            let expected_point = chunk.offset_to_point(offset);
 892            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
 893            assert_eq!(
 894                chunk.point_to_offset(point),
 895                offset,
 896                "mismatch at point {:?}",
 897                point
 898            );
 899            assert_eq!(
 900                chunk.offset_to_offset_utf16(offset),
 901                offset_utf16,
 902                "mismatch at offset {}",
 903                offset
 904            );
 905            assert_eq!(
 906                chunk.offset_utf16_to_offset(offset_utf16),
 907                offset,
 908                "mismatch at offset_utf16 {:?}",
 909                offset_utf16
 910            );
 911            assert_eq!(
 912                chunk.point_to_point_utf16(point),
 913                point_utf16,
 914                "mismatch at point {:?}",
 915                point
 916            );
 917            assert_eq!(
 918                chunk.point_utf16_to_offset(point_utf16, false),
 919                offset,
 920                "mismatch at point_utf16 {:?}",
 921                point_utf16
 922            );
 923            assert_eq!(
 924                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 925                point,
 926                "mismatch for unclipped_point_utf16_to_point at {:?}",
 927                point_utf16
 928            );
 929
 930            assert_eq!(
 931                chunk.clip_point(point, Bias::Left),
 932                point,
 933                "incorrect left clip at {:?}",
 934                point
 935            );
 936            assert_eq!(
 937                chunk.clip_point(point, Bias::Right),
 938                point,
 939                "incorrect right clip at {:?}",
 940                point
 941            );
 942
 943            for i in 1..c.len_utf8() {
 944                let test_point = Point::new(point.row, point.column + i as u32);
 945                assert_eq!(
 946                    chunk.clip_point(test_point, Bias::Left),
 947                    point,
 948                    "incorrect left clip within multi-byte char at {:?}",
 949                    test_point
 950                );
 951                assert_eq!(
 952                    chunk.clip_point(test_point, Bias::Right),
 953                    Point::new(point.row, point.column + c.len_utf8() as u32),
 954                    "incorrect right clip within multi-byte char at {:?}",
 955                    test_point
 956                );
 957            }
 958
 959            for i in 1..c.len_utf16() {
 960                let test_point = Unclipped(PointUtf16::new(
 961                    point_utf16.row,
 962                    point_utf16.column + i as u32,
 963                ));
 964                assert_eq!(
 965                    chunk.unclipped_point_utf16_to_point(test_point),
 966                    point,
 967                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
 968                    test_point
 969                );
 970                assert_eq!(
 971                    chunk.clip_point_utf16(test_point, Bias::Left),
 972                    point_utf16,
 973                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
 974                    test_point
 975                );
 976                assert_eq!(
 977                    chunk.clip_point_utf16(test_point, Bias::Right),
 978                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
 979                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
 980                    test_point
 981                );
 982
 983                let test_offset = OffsetUtf16(offset_utf16.0 + i);
 984                assert_eq!(
 985                    chunk.clip_offset_utf16(test_offset, Bias::Left),
 986                    offset_utf16,
 987                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
 988                    test_offset
 989                );
 990                assert_eq!(
 991                    chunk.clip_offset_utf16(test_offset, Bias::Right),
 992                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
 993                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
 994                    test_offset
 995                );
 996            }
 997
 998            if c == '\n' {
 999                point.row += 1;
1000                point.column = 0;
1001                point_utf16.row += 1;
1002                point_utf16.column = 0;
1003            } else {
1004                point.column += c.len_utf8() as u32;
1005                point_utf16.column += c.len_utf16() as u32;
1006            }
1007
1008            if c == '\t' {
1009                expected_tab_positions.push(TabPosition {
1010                    byte_offset: offset,
1011                    char_offset,
1012                });
1013            }
1014
1015            offset += c.len_utf8();
1016            offset_utf16.0 += c.len_utf16();
1017        }
1018
1019        let final_point = chunk.offset_to_point(offset);
1020        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
1021        assert_eq!(
1022            chunk.point_to_offset(point),
1023            offset,
1024            "mismatch at point {:?}",
1025            point
1026        );
1027        assert_eq!(
1028            chunk.offset_to_offset_utf16(offset),
1029            offset_utf16,
1030            "mismatch at offset {}",
1031            offset
1032        );
1033        assert_eq!(
1034            chunk.offset_utf16_to_offset(offset_utf16),
1035            offset,
1036            "mismatch at offset_utf16 {:?}",
1037            offset_utf16
1038        );
1039        assert_eq!(
1040            chunk.point_to_point_utf16(point),
1041            point_utf16,
1042            "mismatch at final point {:?}",
1043            point
1044        );
1045        assert_eq!(
1046            chunk.point_utf16_to_offset(point_utf16, false),
1047            offset,
1048            "mismatch at final point_utf16 {:?}",
1049            point_utf16
1050        );
1051        assert_eq!(
1052            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
1053            point,
1054            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
1055            point_utf16
1056        );
1057        assert_eq!(
1058            chunk.clip_point(point, Bias::Left),
1059            point,
1060            "incorrect left clip at final point {:?}",
1061            point
1062        );
1063        assert_eq!(
1064            chunk.clip_point(point, Bias::Right),
1065            point,
1066            "incorrect right clip at final point {:?}",
1067            point
1068        );
1069        assert_eq!(
1070            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
1071            point_utf16,
1072            "incorrect left clip_point_utf16 at final point {:?}",
1073            point_utf16
1074        );
1075        assert_eq!(
1076            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
1077            point_utf16,
1078            "incorrect right clip_point_utf16 at final point {:?}",
1079            point_utf16
1080        );
1081        assert_eq!(
1082            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
1083            offset_utf16,
1084            "incorrect left clip_offset_utf16 at final offset {:?}",
1085            offset_utf16
1086        );
1087        assert_eq!(
1088            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
1089            offset_utf16,
1090            "incorrect right clip_offset_utf16 at final offset {:?}",
1091            offset_utf16
1092        );
1093
1094        // Verify length methods
1095        assert_eq!(chunk.len(), text.len());
1096        assert_eq!(
1097            chunk.len_utf16().0,
1098            text.chars().map(|c| c.len_utf16()).sum::<usize>()
1099        );
1100
1101        // Verify line counting
1102        let lines = chunk.lines();
1103        let mut newline_count = 0;
1104        let mut last_line_len = 0;
1105        for c in text.chars() {
1106            if c == '\n' {
1107                newline_count += 1;
1108                last_line_len = 0;
1109            } else {
1110                last_line_len += c.len_utf8() as u32;
1111            }
1112        }
1113        assert_eq!(lines, Point::new(newline_count, last_line_len));
1114
1115        // Verify first/last line chars
1116        if !text.is_empty() {
1117            let first_line = text.split('\n').next().unwrap();
1118            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
1119
1120            let last_line = text.split('\n').next_back().unwrap();
1121            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
1122            assert_eq!(
1123                chunk.last_line_len_utf16(),
1124                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
1125            );
1126        }
1127
1128        // Verify longest row
1129        let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
1130        let mut max_chars = 0;
1131        let mut current_row = 0;
1132        let mut current_chars = 0;
1133        let mut max_row = 0;
1134
1135        for c in text.chars() {
1136            if c == '\n' {
1137                if current_chars > max_chars {
1138                    max_chars = current_chars;
1139                    max_row = current_row;
1140                }
1141                current_row += 1;
1142                current_chars = 0;
1143            } else {
1144                current_chars += 1;
1145            }
1146        }
1147
1148        if current_chars > max_chars {
1149            max_chars = current_chars;
1150            max_row = current_row;
1151        }
1152
1153        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
1154        assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
1155    }
1156}