chunk.rs

   1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
   2use arrayvec::ArrayString;
   3use std::{cmp, ops::Range};
   4use sum_tree::Bias;
   5use unicode_segmentation::GraphemeCursor;
   6use util::debug_panic;
   7
   8#[cfg(not(all(test, not(rust_analyzer))))]
   9pub(crate) type Bitmap = u128;
  10#[cfg(all(test, not(rust_analyzer)))]
  11pub(crate) type Bitmap = u16;
  12
  13pub(crate) const MIN_BASE: usize = MAX_BASE / 2;
  14pub(crate) const MAX_BASE: usize = Bitmap::BITS as usize;
  15
  16#[derive(Clone, Debug, Default)]
  17pub struct Chunk {
  18    /// If bit[i] is set, then the character at index i is the start of a UTF-8 character in the
  19    /// text.
  20    chars: Bitmap,
  21    /// The number of set bits is the number of UTF-16 code units it would take to represent the
  22    /// text.
  23    ///
  24    /// Bit[i] is set if text[i] is the start of a UTF-8 character. If the character would
  25    /// take two UTF-16 code units, then bit[i+1] is also set. (Rust chars never take more
  26    /// than two UTF-16 code units.)
  27    chars_utf16: Bitmap,
  28    /// If bit[i] is set, then the character at index i is an ascii newline.
  29    newlines: Bitmap,
  30    /// If bit[i] is set, then the character at index i is an ascii tab.
  31    tabs: Bitmap,
  32    pub text: ArrayString<MAX_BASE>,
  33}
  34
  35impl Chunk {
  36    pub const MASK_BITS: usize = Bitmap::BITS as usize;
  37
  38    #[inline(always)]
  39    pub fn new(text: &str) -> Self {
  40        let mut this = Chunk::default();
  41        this.push_str(text);
  42        this
  43    }
  44
  45    #[inline(always)]
  46    pub fn push_str(&mut self, text: &str) {
  47        for (char_ix, c) in text.char_indices() {
  48            let ix = self.text.len() + char_ix;
  49            self.chars |= 1 << ix;
  50            self.chars_utf16 |= 1 << ix;
  51            self.chars_utf16 |= (c.len_utf16() as Bitmap) << ix;
  52            self.newlines |= ((c == '\n') as Bitmap) << ix;
  53            self.tabs |= ((c == '\t') as Bitmap) << ix;
  54        }
  55        self.text.push_str(text);
  56    }
  57
  58    #[inline(always)]
  59    pub fn append(&mut self, slice: ChunkSlice) {
  60        if slice.is_empty() {
  61            return;
  62        };
  63
  64        let base_ix = self.text.len();
  65        self.chars |= slice.chars << base_ix;
  66        self.chars_utf16 |= slice.chars_utf16 << base_ix;
  67        self.newlines |= slice.newlines << base_ix;
  68        self.tabs |= slice.tabs << base_ix;
  69        self.text.push_str(slice.text);
  70    }
  71
  72    #[inline(always)]
  73    pub fn as_slice(&self) -> ChunkSlice<'_> {
  74        ChunkSlice {
  75            chars: self.chars,
  76            chars_utf16: self.chars_utf16,
  77            newlines: self.newlines,
  78            tabs: self.tabs,
  79            text: &self.text,
  80        }
  81    }
  82
  83    #[inline(always)]
  84    pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
  85        self.as_slice().slice(range)
  86    }
  87
  88    #[inline(always)]
  89    pub fn chars(&self) -> Bitmap {
  90        self.chars
  91    }
  92
  93    pub fn tabs(&self) -> Bitmap {
  94        self.tabs
  95    }
  96
  97    #[inline(always)]
  98    pub fn is_char_boundary(&self, offset: usize) -> bool {
  99        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 100    }
 101
 102    pub fn floor_char_boundary(&self, index: usize) -> usize {
 103        #[inline]
 104        pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
 105            // This is bit magic equivalent to: b < 128 || b >= 192
 106            (u8 as i8) >= -0x40
 107        }
 108
 109        if index >= self.text.len() {
 110            self.text.len()
 111        } else {
 112            let mut i = index;
 113            while i > 0 {
 114                if is_utf8_char_boundary(self.text.as_bytes()[i]) {
 115                    break;
 116                }
 117                i -= 1;
 118            }
 119
 120            i
 121        }
 122    }
 123
 124    #[track_caller]
 125    #[inline(always)]
 126    pub fn assert_char_boundary(&self, offset: usize) {
 127        if self.is_char_boundary(offset) {
 128            return;
 129        }
 130        panic_char_boundary(self, offset);
 131
 132        #[cold]
 133        #[inline(never)]
 134        fn panic_char_boundary(chunk: &Chunk, offset: usize) {
 135            if offset > chunk.text.len() {
 136                panic!(
 137                    "byte index {} is out of bounds of `{:?}` (length: {})",
 138                    offset,
 139                    chunk.text,
 140                    chunk.text.len()
 141                );
 142            }
 143            // find the character
 144            let char_start = chunk.floor_char_boundary(offset);
 145            // `char_start` must be less than len and a char boundary
 146            let ch = chunk
 147                .text
 148                .get(char_start..)
 149                .unwrap()
 150                .chars()
 151                .next()
 152                .unwrap();
 153            let char_range = char_start..char_start + ch.len_utf8();
 154            panic!(
 155                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
 156                offset, ch, char_range,
 157            );
 158        }
 159    }
 160}
 161
 162#[derive(Clone, Copy, Debug)]
 163pub struct ChunkSlice<'a> {
 164    chars: Bitmap,
 165    chars_utf16: Bitmap,
 166    newlines: Bitmap,
 167    tabs: Bitmap,
 168    text: &'a str,
 169}
 170
 171impl Into<Chunk> for ChunkSlice<'_> {
 172    fn into(self) -> Chunk {
 173        Chunk {
 174            chars: self.chars,
 175            chars_utf16: self.chars_utf16,
 176            newlines: self.newlines,
 177            tabs: self.tabs,
 178            text: self.text.try_into().unwrap(),
 179        }
 180    }
 181}
 182
 183impl<'a> ChunkSlice<'a> {
 184    #[inline(always)]
 185    pub fn is_empty(&self) -> bool {
 186        self.text.is_empty()
 187    }
 188
 189    #[inline(always)]
 190    pub fn is_char_boundary(&self, offset: usize) -> bool {
 191        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 192    }
 193
 194    #[inline(always)]
 195    pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
 196        if mid == MAX_BASE {
 197            let left = self;
 198            let right = ChunkSlice {
 199                chars: 0,
 200                chars_utf16: 0,
 201                newlines: 0,
 202                tabs: 0,
 203                text: "",
 204            };
 205            (left, right)
 206        } else {
 207            let mask = ((1 as Bitmap) << mid) - 1;
 208            let (left_text, right_text) = self.text.split_at(mid);
 209            let left = ChunkSlice {
 210                chars: self.chars & mask,
 211                chars_utf16: self.chars_utf16 & mask,
 212                newlines: self.newlines & mask,
 213                tabs: self.tabs & mask,
 214                text: left_text,
 215            };
 216            let right = ChunkSlice {
 217                chars: self.chars >> mid,
 218                chars_utf16: self.chars_utf16 >> mid,
 219                newlines: self.newlines >> mid,
 220                tabs: self.tabs >> mid,
 221                text: right_text,
 222            };
 223            (left, right)
 224        }
 225    }
 226
 227    #[inline(always)]
 228    pub fn slice(self, range: Range<usize>) -> Self {
 229        let mask = (1 as Bitmap)
 230            .unbounded_shl(range.end as u32)
 231            .wrapping_sub(1);
 232        if range.start == MAX_BASE {
 233            Self {
 234                chars: 0,
 235                chars_utf16: 0,
 236                newlines: 0,
 237                tabs: 0,
 238                text: "",
 239            }
 240        } else {
 241            self.assert_char_boundary(range.start);
 242            self.assert_char_boundary(range.end);
 243            Self {
 244                chars: (self.chars & mask) >> range.start,
 245                chars_utf16: (self.chars_utf16 & mask) >> range.start,
 246                newlines: (self.newlines & mask) >> range.start,
 247                tabs: (self.tabs & mask) >> range.start,
 248                text: &self.text[range],
 249            }
 250        }
 251    }
 252
 253    #[inline(always)]
 254    pub fn text_summary(&self) -> TextSummary {
 255        let mut chars = 0;
 256        let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
 257        TextSummary {
 258            len: self.len(),
 259            chars,
 260            len_utf16: self.len_utf16(),
 261            lines: self.lines(),
 262            first_line_chars: self.first_line_chars(),
 263            last_line_chars: self.last_line_chars(),
 264            last_line_len_utf16: self.last_line_len_utf16(),
 265            longest_row,
 266            longest_row_chars,
 267        }
 268    }
 269
 270    /// Get length in bytes
 271    #[inline(always)]
 272    pub fn len(&self) -> usize {
 273        self.text.len()
 274    }
 275
 276    /// Get length in UTF-16 code units
 277    #[inline(always)]
 278    pub fn len_utf16(&self) -> OffsetUtf16 {
 279        OffsetUtf16(self.chars_utf16.count_ones() as usize)
 280    }
 281
 282    /// Get point representing number of lines and length of last line
 283    #[inline(always)]
 284    pub fn lines(&self) -> Point {
 285        let row = self.newlines.count_ones();
 286        let column = self.newlines.leading_zeros() - (Bitmap::BITS - self.text.len() as u32);
 287        Point::new(row, column)
 288    }
 289
 290    /// Get number of chars in first line
 291    #[inline(always)]
 292    pub fn first_line_chars(&self) -> u32 {
 293        if self.newlines == 0 {
 294            self.chars.count_ones()
 295        } else {
 296            let mask = ((1 as Bitmap) << self.newlines.trailing_zeros()) - 1;
 297            (self.chars & mask).count_ones()
 298        }
 299    }
 300
 301    /// Get number of chars in last line
 302    #[inline(always)]
 303    pub fn last_line_chars(&self) -> u32 {
 304        if self.newlines == 0 {
 305            self.chars.count_ones()
 306        } else {
 307            let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
 308            (self.chars & mask).count_ones()
 309        }
 310    }
 311
 312    /// Get number of UTF-16 code units in last line
 313    #[inline(always)]
 314    pub fn last_line_len_utf16(&self) -> u32 {
 315        if self.newlines == 0 {
 316            self.chars_utf16.count_ones()
 317        } else {
 318            let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
 319            (self.chars_utf16 & mask).count_ones()
 320        }
 321    }
 322
 323    /// Get the longest row in the chunk and its length in characters.
 324    /// Calculate the total number of characters in the chunk along the way.
 325    #[inline(always)]
 326    pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
 327        let mut chars = self.chars;
 328        let mut newlines = self.newlines;
 329        *total_chars = 0;
 330        let mut row = 0;
 331        let mut longest_row = 0;
 332        let mut longest_row_chars = 0;
 333        while newlines > 0 {
 334            let newline_ix = newlines.trailing_zeros();
 335            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
 336            *total_chars += usize::from(row_chars);
 337            if row_chars > longest_row_chars {
 338                longest_row = row;
 339                longest_row_chars = row_chars;
 340            }
 341
 342            newlines >>= newline_ix;
 343            newlines >>= 1;
 344            chars >>= newline_ix;
 345            chars >>= 1;
 346            row += 1;
 347            *total_chars += 1;
 348        }
 349
 350        let row_chars = chars.count_ones() as u8;
 351        *total_chars += usize::from(row_chars);
 352        if row_chars > longest_row_chars {
 353            (row, row_chars as u32)
 354        } else {
 355            (longest_row, longest_row_chars as u32)
 356        }
 357    }
 358
 359    #[inline(always)]
 360    pub fn offset_to_point(&self, offset: usize) -> Point {
 361        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 362        let row = (self.newlines & mask).count_ones();
 363        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 364        let column = (offset - newline_ix as usize) as u32;
 365        Point::new(row, column)
 366    }
 367
 368    #[inline(always)]
 369    pub fn point_to_offset(&self, point: Point) -> usize {
 370        if point.row > self.lines().row {
 371            debug_panic!(
 372                "point {:?} extends beyond rows for string {:?}",
 373                point,
 374                self.text
 375            );
 376            return self.len();
 377        }
 378
 379        let row_offset_range = self.offset_range_for_row(point.row);
 380        if point.column > row_offset_range.len() as u32 {
 381            debug_panic!(
 382                "point {:?} extends beyond row for string {:?}",
 383                point,
 384                self.text
 385            );
 386            row_offset_range.end
 387        } else {
 388            row_offset_range.start + point.column as usize
 389        }
 390    }
 391
 392    #[track_caller]
 393    #[inline(always)]
 394    pub fn assert_char_boundary(&self, offset: usize) {
 395        if self.is_char_boundary(offset) {
 396            return;
 397        }
 398        panic_char_boundary(self, offset);
 399
 400        #[cold]
 401        #[inline(never)]
 402        fn panic_char_boundary(chunk: &ChunkSlice, offset: usize) {
 403            if offset > chunk.text.len() {
 404                panic!(
 405                    "byte index {} is out of bounds of `{:?}` (length: {})",
 406                    offset,
 407                    chunk.text,
 408                    chunk.text.len()
 409                );
 410            }
 411            // find the character
 412            let char_start = chunk.floor_char_boundary(offset);
 413            // `char_start` must be less than len and a char boundary
 414            let ch = chunk
 415                .text
 416                .get(char_start..)
 417                .unwrap()
 418                .chars()
 419                .next()
 420                .unwrap();
 421            let char_range = char_start..char_start + ch.len_utf8();
 422            panic!(
 423                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
 424                offset, ch, char_range,
 425            );
 426        }
 427    }
 428
 429    pub fn floor_char_boundary(&self, index: usize) -> usize {
 430        #[inline]
 431        pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
 432            // This is bit magic equivalent to: b < 128 || b >= 192
 433            (u8 as i8) >= -0x40
 434        }
 435
 436        if index >= self.text.len() {
 437            self.text.len()
 438        } else {
 439            let mut i = index;
 440            while i > 0 {
 441                if is_utf8_char_boundary(self.text.as_bytes()[i]) {
 442                    break;
 443                }
 444                i -= 1;
 445            }
 446
 447            i
 448        }
 449    }
 450
 451    #[inline(always)]
 452    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 453        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 454        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
 455    }
 456
 457    #[inline(always)]
 458    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 459        if target.0 == 0 {
 460            0
 461        } else {
 462            #[cfg(not(test))]
 463            let chars_utf16 = self.chars_utf16;
 464            #[cfg(test)]
 465            let chars_utf16 = self.chars_utf16 as u128;
 466            let ix = nth_set_bit(chars_utf16, target.0) + 1;
 467            if ix == MAX_BASE {
 468                MAX_BASE
 469            } else {
 470                let utf8_additional_len = cmp::min(
 471                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
 472                    self.text.len() - ix,
 473                );
 474                ix + utf8_additional_len
 475            }
 476        }
 477    }
 478
 479    #[inline(always)]
 480    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 481        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 482        let row = (self.newlines & mask).count_ones();
 483        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 484        let column = if newline_ix as usize == MAX_BASE {
 485            0
 486        } else {
 487            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
 488        };
 489        PointUtf16::new(row, column)
 490    }
 491
 492    #[inline(always)]
 493    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 494        self.offset_to_point_utf16(self.point_to_offset(point))
 495    }
 496
 497    #[inline(always)]
 498    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
 499        let lines = self.lines();
 500        if point.row > lines.row {
 501            if !clip {
 502                debug_panic!(
 503                    "point {:?} is beyond this chunk's extent {:?}",
 504                    point,
 505                    self.text
 506                );
 507            }
 508            return self.len();
 509        }
 510
 511        let row_offset_range = self.offset_range_for_row(point.row);
 512        let line = self.slice(row_offset_range.clone());
 513        if point.column > line.last_line_len_utf16() {
 514            if !clip {
 515                debug_panic!(
 516                    "point {:?} is beyond the end of the line in chunk {:?}",
 517                    point,
 518                    self.text
 519                );
 520            }
 521            return line.len();
 522        }
 523
 524        let mut offset = row_offset_range.start;
 525        if point.column > 0 {
 526            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
 527            if !self.text.is_char_boundary(offset) {
 528                offset -= 1;
 529                while !self.text.is_char_boundary(offset) {
 530                    offset -= 1;
 531                }
 532                if !clip {
 533                    debug_panic!(
 534                        "point {:?} is within character in chunk {:?}",
 535                        point,
 536                        self.text,
 537                    );
 538                }
 539            }
 540        }
 541        offset
 542    }
 543
 544    #[inline(always)]
 545    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 546        let max_point = self.lines();
 547        if point.0.row > max_point.row {
 548            return max_point;
 549        }
 550
 551        let row_offset_range = self.offset_range_for_row(point.0.row);
 552        let line = self.slice(row_offset_range);
 553        if point.0.column == 0 {
 554            Point::new(point.0.row, 0)
 555        } else if point.0.column >= line.len_utf16().0 as u32 {
 556            Point::new(point.0.row, line.len() as u32)
 557        } else {
 558            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
 559            while !line.text.is_char_boundary(column) {
 560                column -= 1;
 561            }
 562            Point::new(point.0.row, column as u32)
 563        }
 564    }
 565
 566    #[inline(always)]
 567    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 568        let max_point = self.lines();
 569        if point.row > max_point.row {
 570            return max_point;
 571        }
 572
 573        let line = self.slice(self.offset_range_for_row(point.row));
 574        if point.column == 0 {
 575            point
 576        } else if point.column >= line.len() as u32 {
 577            Point::new(point.row, line.len() as u32)
 578        } else {
 579            let mut column = point.column as usize;
 580            let bytes = line.text.as_bytes();
 581            if bytes[column - 1] < 128 && bytes[column] < 128 {
 582                return Point::new(point.row, column as u32);
 583            }
 584
 585            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
 586            loop {
 587                if line.is_char_boundary(column)
 588                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
 589                {
 590                    break;
 591                }
 592
 593                match bias {
 594                    Bias::Left => column -= 1,
 595                    Bias::Right => column += 1,
 596                }
 597                grapheme_cursor.set_cursor(column);
 598            }
 599            Point::new(point.row, column as u32)
 600        }
 601    }
 602
 603    #[inline(always)]
 604    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 605        let max_point = self.lines();
 606        if point.0.row > max_point.row {
 607            PointUtf16::new(max_point.row, self.last_line_len_utf16())
 608        } else {
 609            let line = self.slice(self.offset_range_for_row(point.0.row));
 610            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
 611            PointUtf16::new(point.0.row, column.0 as u32)
 612        }
 613    }
 614
 615    #[inline(always)]
 616    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 617        if target == OffsetUtf16::default() {
 618            OffsetUtf16::default()
 619        } else if target >= self.len_utf16() {
 620            self.len_utf16()
 621        } else {
 622            let mut offset = self.offset_utf16_to_offset(target);
 623            while !self.text.is_char_boundary(offset) {
 624                if bias == Bias::Left {
 625                    offset -= 1;
 626                } else {
 627                    offset += 1;
 628                }
 629            }
 630            self.offset_to_offset_utf16(offset)
 631        }
 632    }
 633
 634    #[inline(always)]
 635    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
 636        let row_start = if row > 0 {
 637            #[cfg(not(test))]
 638            let newlines = self.newlines;
 639            #[cfg(test)]
 640            let newlines = self.newlines as u128;
 641            nth_set_bit(newlines, row as usize) + 1
 642        } else {
 643            0
 644        };
 645        let row_len = if row_start == MAX_BASE {
 646            0
 647        } else {
 648            cmp::min(
 649                (self.newlines >> row_start).trailing_zeros(),
 650                (self.text.len() - row_start) as u32,
 651            )
 652        };
 653        row_start..row_start + row_len as usize
 654    }
 655
 656    #[inline(always)]
 657    pub fn tabs(&self) -> Tabs {
 658        Tabs {
 659            tabs: self.tabs,
 660            chars: self.chars,
 661        }
 662    }
 663}
 664
 665pub struct Tabs {
 666    tabs: Bitmap,
 667    chars: Bitmap,
 668}
 669
 670#[derive(Debug, PartialEq, Eq)]
 671pub struct TabPosition {
 672    pub byte_offset: usize,
 673    pub char_offset: usize,
 674}
 675
 676impl Iterator for Tabs {
 677    type Item = TabPosition;
 678
 679    fn next(&mut self) -> Option<Self::Item> {
 680        if self.tabs == 0 {
 681            return None;
 682        }
 683
 684        let tab_offset = self.tabs.trailing_zeros() as usize;
 685        let chars_mask = (1 << tab_offset) - 1;
 686        let char_offset = (self.chars & chars_mask).count_ones() as usize;
 687
 688        // Since tabs are 1 byte the tab offset is the same as the byte offset
 689        let position = TabPosition {
 690            byte_offset: tab_offset,
 691            char_offset,
 692        };
 693        // Remove the tab we've just seen
 694        self.tabs ^= 1 << tab_offset;
 695
 696        Some(position)
 697    }
 698}
 699
 700/// Finds the n-th bit that is set to 1.
 701#[inline(always)]
 702fn nth_set_bit(v: u128, n: usize) -> usize {
 703    let low = v as u64;
 704    let high = (v >> 64) as u64;
 705
 706    let low_count = low.count_ones() as usize;
 707    if n > low_count {
 708        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
 709    } else {
 710        nth_set_bit_u64(low, n as u64) as usize
 711    }
 712}
 713
 714#[inline(always)]
 715fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
 716    let v = v.reverse_bits();
 717    let mut s: u64 = 64;
 718
 719    // Parallel bit count intermediates
 720    let a = v - ((v >> 1) & (u64::MAX / 3));
 721    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
 722    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
 723    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
 724
 725    // Branchless select
 726    let t = (d >> 32) + (d >> 48);
 727    s -= (t.wrapping_sub(n) & 256) >> 3;
 728    n -= t & (t.wrapping_sub(n) >> 8);
 729
 730    let t = (d >> (s - 16)) & 0xff;
 731    s -= (t.wrapping_sub(n) & 256) >> 4;
 732    n -= t & (t.wrapping_sub(n) >> 8);
 733
 734    let t = (c >> (s - 8)) & 0xf;
 735    s -= (t.wrapping_sub(n) & 256) >> 5;
 736    n -= t & (t.wrapping_sub(n) >> 8);
 737
 738    let t = (b >> (s - 4)) & 0x7;
 739    s -= (t.wrapping_sub(n) & 256) >> 6;
 740    n -= t & (t.wrapping_sub(n) >> 8);
 741
 742    let t = (a >> (s - 2)) & 0x3;
 743    s -= (t.wrapping_sub(n) & 256) >> 7;
 744    n -= t & (t.wrapping_sub(n) >> 8);
 745
 746    let t = (v >> (s - 1)) & 0x1;
 747    s -= (t.wrapping_sub(n) & 256) >> 8;
 748
 749    65 - s - 1
 750}
 751
 752#[cfg(test)]
 753mod tests {
 754    use super::*;
 755    use rand::prelude::*;
 756    use util::RandomCharIter;
 757
 758    #[gpui::test(iterations = 100)]
 759    fn test_random_chunks(mut rng: StdRng) {
 760        let text = random_string_with_utf8_len(&mut rng, MAX_BASE);
 761        log::info!("Chunk: {:?}", text);
 762        let chunk = Chunk::new(&text);
 763        verify_chunk(chunk.as_slice(), &text);
 764
 765        // Verify Chunk::chars() bitmap
 766        let expected_chars = char_offsets(&text)
 767            .into_iter()
 768            .inspect(|i| assert!(*i < MAX_BASE))
 769            .fold(0 as Bitmap, |acc, i| acc | (1 << i));
 770        assert_eq!(chunk.chars(), expected_chars);
 771
 772        for _ in 0..10 {
 773            let mut start = rng.random_range(0..=chunk.text.len());
 774            let mut end = rng.random_range(start..=chunk.text.len());
 775            while !chunk.text.is_char_boundary(start) {
 776                start -= 1;
 777            }
 778            while !chunk.text.is_char_boundary(end) {
 779                end -= 1;
 780            }
 781            let range = start..end;
 782            log::info!("Range: {:?}", range);
 783            let text_slice = &text[range.clone()];
 784            let chunk_slice = chunk.slice(range);
 785            verify_chunk(chunk_slice, text_slice);
 786        }
 787    }
 788
 789    #[gpui::test(iterations = 100)]
 790    fn test_split_chunk_slice(mut rng: StdRng) {
 791        let text = &random_string_with_utf8_len(&mut rng, MAX_BASE);
 792        let chunk = Chunk::new(text);
 793        let offset = char_offsets_with_end(text)
 794            .into_iter()
 795            .choose(&mut rng)
 796            .unwrap();
 797        let (a, b) = chunk.as_slice().split_at(offset);
 798        let (a_str, b_str) = text.split_at(offset);
 799        verify_chunk(a, a_str);
 800        verify_chunk(b, b_str);
 801    }
 802
 803    #[gpui::test(iterations = 1000)]
 804    fn test_nth_set_bit_random(mut rng: StdRng) {
 805        let set_count = rng.random_range(0..=128);
 806        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
 807        set_bits.sort();
 808        let mut n = 0;
 809        for ix in set_bits.iter().copied() {
 810            n |= 1 << ix;
 811        }
 812
 813        for (mut ix, position) in set_bits.into_iter().enumerate() {
 814            ix += 1;
 815            assert_eq!(
 816                nth_set_bit(n, ix),
 817                position,
 818                "nth_set_bit({:0128b}, {})",
 819                n,
 820                ix
 821            );
 822        }
 823    }
 824
 825    /// Returns a (biased) random string whose UTF-8 length is no more than `len`.
 826    fn random_string_with_utf8_len(rng: &mut StdRng, len: usize) -> String {
 827        let mut str = String::new();
 828        let mut chars = RandomCharIter::new(rng);
 829        loop {
 830            let ch = chars.next().unwrap();
 831            if str.len() + ch.len_utf8() > len {
 832                break;
 833            }
 834            str.push(ch);
 835        }
 836        str
 837    }
 838
 839    #[gpui::test(iterations = 1000)]
 840    fn test_append_random_strings(mut rng: StdRng) {
 841        let len1 = rng.random_range(0..=MAX_BASE);
 842        let len2 = rng.random_range(0..=MAX_BASE).saturating_sub(len1);
 843        let str1 = random_string_with_utf8_len(&mut rng, len1);
 844        let str2 = random_string_with_utf8_len(&mut rng, len2);
 845        let mut chunk1 = Chunk::new(&str1);
 846        let chunk2 = Chunk::new(&str2);
 847        let char_offsets = char_offsets_with_end(&str2);
 848        let start_index = rng.random_range(0..char_offsets.len());
 849        let start_offset = char_offsets[start_index];
 850        let end_offset = char_offsets[rng.random_range(start_index..char_offsets.len())];
 851        chunk1.append(chunk2.slice(start_offset..end_offset));
 852        verify_chunk(chunk1.as_slice(), &(str1 + &str2[start_offset..end_offset]));
 853    }
 854
 855    /// Return the byte offsets for each character in a string.
 856    ///
 857    /// These are valid offsets to split the string.
 858    fn char_offsets(text: &str) -> Vec<usize> {
 859        text.char_indices().map(|(i, _c)| i).collect()
 860    }
 861
 862    /// Return the byte offsets for each character in a string, plus the offset
 863    /// past the end of the string.
 864    fn char_offsets_with_end(text: &str) -> Vec<usize> {
 865        let mut v = char_offsets(text);
 866        v.push(text.len());
 867        v
 868    }
 869
 870    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
 871        let mut offset = 0;
 872        let mut offset_utf16 = OffsetUtf16(0);
 873        let mut point = Point::zero();
 874        let mut point_utf16 = PointUtf16::zero();
 875
 876        log::info!("Verifying chunk {:?}", text);
 877        assert_eq!(chunk.offset_to_point(0), Point::zero());
 878
 879        let mut expected_tab_positions = Vec::new();
 880
 881        for (char_offset, c) in text.chars().enumerate() {
 882            let expected_point = chunk.offset_to_point(offset);
 883            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
 884            assert_eq!(
 885                chunk.point_to_offset(point),
 886                offset,
 887                "mismatch at point {:?}",
 888                point
 889            );
 890            assert_eq!(
 891                chunk.offset_to_offset_utf16(offset),
 892                offset_utf16,
 893                "mismatch at offset {}",
 894                offset
 895            );
 896            assert_eq!(
 897                chunk.offset_utf16_to_offset(offset_utf16),
 898                offset,
 899                "mismatch at offset_utf16 {:?}",
 900                offset_utf16
 901            );
 902            assert_eq!(
 903                chunk.point_to_point_utf16(point),
 904                point_utf16,
 905                "mismatch at point {:?}",
 906                point
 907            );
 908            assert_eq!(
 909                chunk.point_utf16_to_offset(point_utf16, false),
 910                offset,
 911                "mismatch at point_utf16 {:?}",
 912                point_utf16
 913            );
 914            assert_eq!(
 915                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 916                point,
 917                "mismatch for unclipped_point_utf16_to_point at {:?}",
 918                point_utf16
 919            );
 920
 921            assert_eq!(
 922                chunk.clip_point(point, Bias::Left),
 923                point,
 924                "incorrect left clip at {:?}",
 925                point
 926            );
 927            assert_eq!(
 928                chunk.clip_point(point, Bias::Right),
 929                point,
 930                "incorrect right clip at {:?}",
 931                point
 932            );
 933
 934            for i in 1..c.len_utf8() {
 935                let test_point = Point::new(point.row, point.column + i as u32);
 936                assert_eq!(
 937                    chunk.clip_point(test_point, Bias::Left),
 938                    point,
 939                    "incorrect left clip within multi-byte char at {:?}",
 940                    test_point
 941                );
 942                assert_eq!(
 943                    chunk.clip_point(test_point, Bias::Right),
 944                    Point::new(point.row, point.column + c.len_utf8() as u32),
 945                    "incorrect right clip within multi-byte char at {:?}",
 946                    test_point
 947                );
 948            }
 949
 950            for i in 1..c.len_utf16() {
 951                let test_point = Unclipped(PointUtf16::new(
 952                    point_utf16.row,
 953                    point_utf16.column + i as u32,
 954                ));
 955                assert_eq!(
 956                    chunk.unclipped_point_utf16_to_point(test_point),
 957                    point,
 958                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
 959                    test_point
 960                );
 961                assert_eq!(
 962                    chunk.clip_point_utf16(test_point, Bias::Left),
 963                    point_utf16,
 964                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
 965                    test_point
 966                );
 967                assert_eq!(
 968                    chunk.clip_point_utf16(test_point, Bias::Right),
 969                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
 970                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
 971                    test_point
 972                );
 973
 974                let test_offset = OffsetUtf16(offset_utf16.0 + i);
 975                assert_eq!(
 976                    chunk.clip_offset_utf16(test_offset, Bias::Left),
 977                    offset_utf16,
 978                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
 979                    test_offset
 980                );
 981                assert_eq!(
 982                    chunk.clip_offset_utf16(test_offset, Bias::Right),
 983                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
 984                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
 985                    test_offset
 986                );
 987            }
 988
 989            if c == '\n' {
 990                point.row += 1;
 991                point.column = 0;
 992                point_utf16.row += 1;
 993                point_utf16.column = 0;
 994            } else {
 995                point.column += c.len_utf8() as u32;
 996                point_utf16.column += c.len_utf16() as u32;
 997            }
 998
 999            if c == '\t' {
1000                expected_tab_positions.push(TabPosition {
1001                    byte_offset: offset,
1002                    char_offset,
1003                });
1004            }
1005
1006            offset += c.len_utf8();
1007            offset_utf16.0 += c.len_utf16();
1008        }
1009
1010        let final_point = chunk.offset_to_point(offset);
1011        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
1012        assert_eq!(
1013            chunk.point_to_offset(point),
1014            offset,
1015            "mismatch at point {:?}",
1016            point
1017        );
1018        assert_eq!(
1019            chunk.offset_to_offset_utf16(offset),
1020            offset_utf16,
1021            "mismatch at offset {}",
1022            offset
1023        );
1024        assert_eq!(
1025            chunk.offset_utf16_to_offset(offset_utf16),
1026            offset,
1027            "mismatch at offset_utf16 {:?}",
1028            offset_utf16
1029        );
1030        assert_eq!(
1031            chunk.point_to_point_utf16(point),
1032            point_utf16,
1033            "mismatch at final point {:?}",
1034            point
1035        );
1036        assert_eq!(
1037            chunk.point_utf16_to_offset(point_utf16, false),
1038            offset,
1039            "mismatch at final point_utf16 {:?}",
1040            point_utf16
1041        );
1042        assert_eq!(
1043            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
1044            point,
1045            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
1046            point_utf16
1047        );
1048        assert_eq!(
1049            chunk.clip_point(point, Bias::Left),
1050            point,
1051            "incorrect left clip at final point {:?}",
1052            point
1053        );
1054        assert_eq!(
1055            chunk.clip_point(point, Bias::Right),
1056            point,
1057            "incorrect right clip at final point {:?}",
1058            point
1059        );
1060        assert_eq!(
1061            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
1062            point_utf16,
1063            "incorrect left clip_point_utf16 at final point {:?}",
1064            point_utf16
1065        );
1066        assert_eq!(
1067            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
1068            point_utf16,
1069            "incorrect right clip_point_utf16 at final point {:?}",
1070            point_utf16
1071        );
1072        assert_eq!(
1073            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
1074            offset_utf16,
1075            "incorrect left clip_offset_utf16 at final offset {:?}",
1076            offset_utf16
1077        );
1078        assert_eq!(
1079            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
1080            offset_utf16,
1081            "incorrect right clip_offset_utf16 at final offset {:?}",
1082            offset_utf16
1083        );
1084
1085        // Verify length methods
1086        assert_eq!(chunk.len(), text.len());
1087        assert_eq!(
1088            chunk.len_utf16().0,
1089            text.chars().map(|c| c.len_utf16()).sum::<usize>()
1090        );
1091
1092        // Verify line counting
1093        let lines = chunk.lines();
1094        let mut newline_count = 0;
1095        let mut last_line_len = 0;
1096        for c in text.chars() {
1097            if c == '\n' {
1098                newline_count += 1;
1099                last_line_len = 0;
1100            } else {
1101                last_line_len += c.len_utf8() as u32;
1102            }
1103        }
1104        assert_eq!(lines, Point::new(newline_count, last_line_len));
1105
1106        // Verify first/last line chars
1107        if !text.is_empty() {
1108            let first_line = text.split('\n').next().unwrap();
1109            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
1110
1111            let last_line = text.split('\n').next_back().unwrap();
1112            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
1113            assert_eq!(
1114                chunk.last_line_len_utf16(),
1115                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
1116            );
1117        }
1118
1119        // Verify longest row
1120        let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
1121        let mut max_chars = 0;
1122        let mut current_row = 0;
1123        let mut current_chars = 0;
1124        let mut max_row = 0;
1125
1126        for c in text.chars() {
1127            if c == '\n' {
1128                if current_chars > max_chars {
1129                    max_chars = current_chars;
1130                    max_row = current_row;
1131                }
1132                current_row += 1;
1133                current_chars = 0;
1134            } else {
1135                current_chars += 1;
1136            }
1137        }
1138
1139        if current_chars > max_chars {
1140            max_chars = current_chars;
1141            max_row = current_row;
1142        }
1143
1144        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
1145        assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
1146    }
1147}