chunk.rs

   1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
   2use arrayvec::ArrayString;
   3use std::{cmp, ops::Range};
   4use sum_tree::Bias;
   5use unicode_segmentation::GraphemeCursor;
   6use util::debug_panic;
   7
   8#[cfg(not(all(test, not(rust_analyzer))))]
   9pub(crate) type Bitmap = u128;
  10#[cfg(all(test, not(rust_analyzer)))]
  11pub(crate) type Bitmap = u16;
  12
  13pub(crate) const MIN_BASE: usize = MAX_BASE / 2;
  14pub(crate) const MAX_BASE: usize = Bitmap::BITS as usize;
  15
  16#[derive(Clone, Debug, Default)]
  17pub struct Chunk {
  18    /// If bit[i] is set, then the character at index i is the start of a UTF-8 character in the
  19    /// text.
  20    chars: Bitmap,
  21    /// The number of set bits is the number of UTF-16 code units it would take to represent the
  22    /// text.
  23    ///
  24    /// Bit[i] is set if text[i] is the start of a UTF-8 character. If the character would
  25    /// take two UTF-16 code units, then bit[i+1] is also set. (Rust chars never take more
  26    /// than two UTF-16 code units.)
  27    chars_utf16: Bitmap,
  28    /// If bit[i] is set, then the character at index i is an ascii newline.
  29    newlines: Bitmap,
  30    /// If bit[i] is set, then the character at index i is an ascii tab.
  31    tabs: Bitmap,
  32    pub text: ArrayString<MAX_BASE>,
  33}
  34
  35#[inline(always)]
  36const fn saturating_shl_mask(offset: u32) -> Bitmap {
  37    (1 as Bitmap).unbounded_shl(offset).wrapping_sub(1)
  38}
  39
  40#[inline(always)]
  41const fn saturating_shr_mask(offset: u32) -> Bitmap {
  42    !Bitmap::MAX.unbounded_shr(offset)
  43}
  44
  45impl Chunk {
  46    pub const MASK_BITS: usize = Bitmap::BITS as usize;
  47
  48    #[inline(always)]
  49    pub fn new(text: &str) -> Self {
  50        let mut this = Chunk::default();
  51        this.push_str(text);
  52        this
  53    }
  54
  55    #[inline(always)]
  56    pub fn push_str(&mut self, text: &str) {
  57        for (char_ix, c) in text.char_indices() {
  58            let ix = self.text.len() + char_ix;
  59            self.chars |= 1 << ix;
  60            self.chars_utf16 |= 1 << ix;
  61            self.chars_utf16 |= (c.len_utf16() as Bitmap) << ix;
  62            self.newlines |= ((c == '\n') as Bitmap) << ix;
  63            self.tabs |= ((c == '\t') as Bitmap) << ix;
  64        }
  65        self.text.push_str(text);
  66    }
  67
  68    #[inline(always)]
  69    pub fn append(&mut self, slice: ChunkSlice) {
  70        if slice.is_empty() {
  71            return;
  72        };
  73
  74        let base_ix = self.text.len();
  75        self.chars |= slice.chars << base_ix;
  76        self.chars_utf16 |= slice.chars_utf16 << base_ix;
  77        self.newlines |= slice.newlines << base_ix;
  78        self.tabs |= slice.tabs << base_ix;
  79        self.text.push_str(slice.text);
  80    }
  81
  82    #[inline(always)]
  83    pub fn as_slice(&self) -> ChunkSlice<'_> {
  84        ChunkSlice {
  85            chars: self.chars,
  86            chars_utf16: self.chars_utf16,
  87            newlines: self.newlines,
  88            tabs: self.tabs,
  89            text: &self.text,
  90        }
  91    }
  92
  93    #[inline(always)]
  94    pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
  95        self.as_slice().slice(range)
  96    }
  97
  98    #[inline(always)]
  99    pub fn chars(&self) -> Bitmap {
 100        self.chars
 101    }
 102
 103    pub fn tabs(&self) -> Bitmap {
 104        self.tabs
 105    }
 106
 107    #[inline(always)]
 108    pub fn is_char_boundary(&self, offset: usize) -> bool {
 109        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 110    }
 111
 112    pub fn floor_char_boundary(&self, index: usize) -> usize {
 113        if index >= self.text.len() {
 114            self.text.len()
 115        } else {
 116            let mut i = index;
 117            while i > 0 {
 118                if util::is_utf8_char_boundary(self.text.as_bytes()[i]) {
 119                    break;
 120                }
 121                i -= 1;
 122            }
 123
 124            i
 125        }
 126    }
 127
 128    #[track_caller]
 129    #[inline(always)]
 130    pub fn assert_char_boundary<const PANIC: bool>(&self, offset: usize) -> bool {
 131        if self.is_char_boundary(offset) {
 132            return true;
 133        }
 134        if PANIC {
 135            panic_char_boundary(&self.text, offset);
 136        } else {
 137            log_err_char_boundary(&self.text, offset);
 138            false
 139        }
 140    }
 141}
 142
 143#[derive(Clone, Copy, Debug)]
 144pub struct ChunkSlice<'a> {
 145    chars: Bitmap,
 146    chars_utf16: Bitmap,
 147    newlines: Bitmap,
 148    tabs: Bitmap,
 149    text: &'a str,
 150}
 151
 152impl Into<Chunk> for ChunkSlice<'_> {
 153    fn into(self) -> Chunk {
 154        Chunk {
 155            chars: self.chars,
 156            chars_utf16: self.chars_utf16,
 157            newlines: self.newlines,
 158            tabs: self.tabs,
 159            text: self.text.try_into().unwrap(),
 160        }
 161    }
 162}
 163
 164impl<'a> ChunkSlice<'a> {
 165    #[inline(always)]
 166    pub fn is_empty(&self) -> bool {
 167        self.text.is_empty()
 168    }
 169
 170    #[inline(always)]
 171    pub fn is_char_boundary(&self, offset: usize) -> bool {
 172        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 173    }
 174
 175    #[inline(always)]
 176    pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
 177        if mid == MAX_BASE {
 178            let left = self;
 179            let right = ChunkSlice {
 180                chars: 0,
 181                chars_utf16: 0,
 182                newlines: 0,
 183                tabs: 0,
 184                text: "",
 185            };
 186            (left, right)
 187        } else {
 188            let mask = ((1 as Bitmap) << mid) - 1;
 189            let (left_text, right_text) = self.text.split_at(mid);
 190            let left = ChunkSlice {
 191                chars: self.chars & mask,
 192                chars_utf16: self.chars_utf16 & mask,
 193                newlines: self.newlines & mask,
 194                tabs: self.tabs & mask,
 195                text: left_text,
 196            };
 197            let right = ChunkSlice {
 198                chars: self.chars >> mid,
 199                chars_utf16: self.chars_utf16 >> mid,
 200                newlines: self.newlines >> mid,
 201                tabs: self.tabs >> mid,
 202                text: right_text,
 203            };
 204            (left, right)
 205        }
 206    }
 207
 208    #[inline(always)]
 209    pub fn slice(self, mut range: Range<usize>) -> Self {
 210        if range.start == MAX_BASE {
 211            Self {
 212                chars: 0,
 213                chars_utf16: 0,
 214                newlines: 0,
 215                tabs: 0,
 216                text: "",
 217            }
 218        } else {
 219            if !self.assert_char_boundary::<false>(range.start) {
 220                range.start = self.text.ceil_char_boundary(range.start);
 221            }
 222            if !self.assert_char_boundary::<false>(range.end) {
 223                range.end = if range.end < range.start {
 224                    range.start
 225                } else {
 226                    self.text.floor_char_boundary(range.end)
 227                };
 228            }
 229            let mask = (1 as Bitmap)
 230                .unbounded_shl(range.end as u32)
 231                .wrapping_sub(1);
 232            Self {
 233                chars: (self.chars & mask) >> range.start,
 234                chars_utf16: (self.chars_utf16 & mask) >> range.start,
 235                newlines: (self.newlines & mask) >> range.start,
 236                tabs: (self.tabs & mask) >> range.start,
 237                text: &self.text[range],
 238            }
 239        }
 240    }
 241
 242    #[inline(always)]
 243    pub fn text_summary(&self) -> TextSummary {
 244        let mut chars = 0;
 245        let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
 246        TextSummary {
 247            len: self.len(),
 248            chars,
 249            len_utf16: self.len_utf16(),
 250            lines: self.lines(),
 251            first_line_chars: self.first_line_chars(),
 252            last_line_chars: self.last_line_chars(),
 253            last_line_len_utf16: self.last_line_len_utf16(),
 254            longest_row,
 255            longest_row_chars,
 256        }
 257    }
 258
 259    /// Get length in bytes
 260    #[inline(always)]
 261    pub fn len(&self) -> usize {
 262        self.text.len()
 263    }
 264
 265    /// Get length in UTF-16 code units
 266    #[inline(always)]
 267    pub fn len_utf16(&self) -> OffsetUtf16 {
 268        OffsetUtf16(self.chars_utf16.count_ones() as usize)
 269    }
 270
 271    /// Get point representing number of lines and length of last line
 272    #[inline(always)]
 273    pub fn lines(&self) -> Point {
 274        let row = self.newlines.count_ones();
 275        let column = self.newlines.leading_zeros() - (Bitmap::BITS - self.text.len() as u32);
 276        Point::new(row, column)
 277    }
 278
 279    /// Get number of chars in first line
 280    #[inline(always)]
 281    pub fn first_line_chars(&self) -> u32 {
 282        (self.chars & saturating_shl_mask(self.newlines.trailing_zeros())).count_ones()
 283    }
 284
 285    /// Get number of chars in last line
 286    #[inline(always)]
 287    pub fn last_line_chars(&self) -> u32 {
 288        (self.chars & saturating_shr_mask(self.newlines.leading_zeros())).count_ones()
 289    }
 290
 291    /// Get number of UTF-16 code units in last line
 292    #[inline(always)]
 293    pub fn last_line_len_utf16(&self) -> u32 {
 294        (self.chars_utf16 & saturating_shr_mask(self.newlines.leading_zeros())).count_ones()
 295    }
 296
 297    /// Get the longest row in the chunk and its length in characters.
 298    /// Calculate the total number of characters in the chunk along the way.
 299    #[inline(always)]
 300    pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
 301        let mut chars = self.chars;
 302        let mut newlines = self.newlines;
 303        *total_chars = 0;
 304        let mut row = 0;
 305        let mut longest_row = 0;
 306        let mut longest_row_chars = 0;
 307        while newlines > 0 {
 308            let newline_ix = newlines.trailing_zeros();
 309            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
 310            *total_chars += usize::from(row_chars);
 311            if row_chars > longest_row_chars {
 312                longest_row = row;
 313                longest_row_chars = row_chars;
 314            }
 315
 316            newlines >>= newline_ix;
 317            newlines >>= 1;
 318            chars >>= newline_ix;
 319            chars >>= 1;
 320            row += 1;
 321            *total_chars += 1;
 322        }
 323
 324        let row_chars = chars.count_ones() as u8;
 325        *total_chars += usize::from(row_chars);
 326        if row_chars > longest_row_chars {
 327            (row, row_chars as u32)
 328        } else {
 329            (longest_row, longest_row_chars as u32)
 330        }
 331    }
 332
 333    #[inline(always)]
 334    pub fn offset_to_point(&self, offset: usize) -> Point {
 335        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 336        let row = (self.newlines & mask).count_ones();
 337        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 338        let column = (offset - newline_ix as usize) as u32;
 339        Point::new(row, column)
 340    }
 341
 342    #[inline(always)]
 343    pub fn point_to_offset(&self, point: Point) -> usize {
 344        if point.row > self.lines().row {
 345            debug_panic!(
 346                "point {:?} extends beyond rows for string {:?}",
 347                point,
 348                self.text
 349            );
 350            return self.len();
 351        }
 352
 353        let row_offset_range = self.offset_range_for_row(point.row);
 354        if point.column > row_offset_range.len() as u32 {
 355            debug_panic!(
 356                "point {:?} extends beyond row for string {:?}",
 357                point,
 358                self.text
 359            );
 360            row_offset_range.end
 361        } else {
 362            row_offset_range.start + point.column as usize
 363        }
 364    }
 365
 366    #[track_caller]
 367    #[inline(always)]
 368    pub fn assert_char_boundary<const PANIC: bool>(&self, offset: usize) -> bool {
 369        if self.is_char_boundary(offset) {
 370            return true;
 371        }
 372        if PANIC {
 373            panic_char_boundary(self.text, offset);
 374        } else {
 375            log_err_char_boundary(self.text, offset);
 376            false
 377        }
 378    }
 379
 380    pub fn floor_char_boundary(&self, index: usize) -> usize {
 381        self.text.floor_char_boundary(index)
 382    }
 383
 384    #[inline(always)]
 385    pub fn point_to_offset_utf16(&self, point: Point) -> OffsetUtf16 {
 386        if point.row > self.lines().row {
 387            debug_panic!(
 388                "point {:?} extends beyond rows for string {:?}",
 389                point,
 390                self.text
 391            );
 392            return self.len_utf16();
 393        }
 394        self.offset_to_offset_utf16(self.point_to_offset(point))
 395    }
 396
 397    #[inline(always)]
 398    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 399        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 400        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
 401    }
 402
 403    #[inline(always)]
 404    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 405        if target.0 == 0 {
 406            0
 407        } else {
 408            #[cfg(not(test))]
 409            let chars_utf16 = self.chars_utf16;
 410            #[cfg(test)]
 411            let chars_utf16 = self.chars_utf16 as u128;
 412            let ix = nth_set_bit(chars_utf16, target.0) + 1;
 413            if ix == MAX_BASE {
 414                MAX_BASE
 415            } else {
 416                let utf8_additional_len = cmp::min(
 417                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
 418                    self.text.len() - ix,
 419                );
 420                ix + utf8_additional_len
 421            }
 422        }
 423    }
 424
 425    #[inline(always)]
 426    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 427        let mask = saturating_shl_mask(offset as u32);
 428        let row = (self.newlines & saturating_shl_mask(offset as u32)).count_ones();
 429        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 430        let column = if newline_ix as usize == MAX_BASE {
 431            0
 432        } else {
 433            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
 434        };
 435        PointUtf16::new(row, column)
 436    }
 437
 438    #[inline(always)]
 439    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 440        self.offset_to_point_utf16(self.point_to_offset(point))
 441    }
 442
 443    #[inline(always)]
 444    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
 445        let lines = self.lines();
 446        if point.row > lines.row {
 447            if !clip {
 448                debug_panic!(
 449                    "point {:?} is beyond this chunk's extent {:?}",
 450                    point,
 451                    self.text
 452                );
 453            }
 454            return self.len();
 455        }
 456
 457        let row_offset_range = self.offset_range_for_row(point.row);
 458        let line = self.slice(row_offset_range.clone());
 459        if point.column > line.last_line_len_utf16() {
 460            if !clip {
 461                debug_panic!(
 462                    "point {:?} is beyond the end of the line in chunk {:?}",
 463                    point,
 464                    self.text
 465                );
 466            }
 467            return line.len();
 468        }
 469
 470        let mut offset = row_offset_range.start;
 471        if point.column > 0 {
 472            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
 473            if !self.text.is_char_boundary(offset) {
 474                offset -= 1;
 475                while !self.text.is_char_boundary(offset) {
 476                    offset -= 1;
 477                }
 478                if !clip {
 479                    debug_panic!(
 480                        "point {:?} is within character in chunk {:?}",
 481                        point,
 482                        self.text,
 483                    );
 484                }
 485            }
 486        }
 487        offset
 488    }
 489
 490    #[inline(always)]
 491    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 492        let max_point = self.lines();
 493        if point.0.row > max_point.row {
 494            return max_point;
 495        }
 496
 497        let row_offset_range = self.offset_range_for_row(point.0.row);
 498        let line = self.slice(row_offset_range);
 499        if point.0.column == 0 {
 500            Point::new(point.0.row, 0)
 501        } else if point.0.column >= line.len_utf16().0 as u32 {
 502            Point::new(point.0.row, line.len() as u32)
 503        } else {
 504            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
 505            while !line.text.is_char_boundary(column) {
 506                column -= 1;
 507            }
 508            Point::new(point.0.row, column as u32)
 509        }
 510    }
 511
 512    #[inline(always)]
 513    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 514        let max_point = self.lines();
 515        if point.row > max_point.row {
 516            return max_point;
 517        }
 518
 519        let line = self.slice(self.offset_range_for_row(point.row));
 520        if point.column == 0 {
 521            point
 522        } else if point.column >= line.len() as u32 {
 523            Point::new(point.row, line.len() as u32)
 524        } else {
 525            let mut column = point.column as usize;
 526            let bytes = line.text.as_bytes();
 527            if bytes[column - 1] < 128 && bytes[column] < 128 {
 528                return Point::new(point.row, column as u32);
 529            }
 530
 531            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
 532            loop {
 533                if line.is_char_boundary(column)
 534                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
 535                {
 536                    break;
 537                }
 538
 539                match bias {
 540                    Bias::Left => column -= 1,
 541                    Bias::Right => column += 1,
 542                }
 543                grapheme_cursor.set_cursor(column);
 544            }
 545            Point::new(point.row, column as u32)
 546        }
 547    }
 548
 549    #[inline(always)]
 550    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 551        let max_point = self.lines();
 552        if point.0.row > max_point.row {
 553            PointUtf16::new(max_point.row, self.last_line_len_utf16())
 554        } else {
 555            let line = self.slice(self.offset_range_for_row(point.0.row));
 556            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
 557            PointUtf16::new(point.0.row, column.0 as u32)
 558        }
 559    }
 560
 561    #[inline(always)]
 562    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 563        if target == OffsetUtf16::default() {
 564            OffsetUtf16::default()
 565        } else if target >= self.len_utf16() {
 566            self.len_utf16()
 567        } else {
 568            let mut offset = self.offset_utf16_to_offset(target);
 569            while !self.text.is_char_boundary(offset) {
 570                if bias == Bias::Left {
 571                    offset -= 1;
 572                } else {
 573                    offset += 1;
 574                }
 575            }
 576            self.offset_to_offset_utf16(offset)
 577        }
 578    }
 579
 580    #[inline(always)]
 581    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
 582        let row_start = if row > 0 {
 583            #[cfg(not(test))]
 584            let newlines = self.newlines;
 585            #[cfg(test)]
 586            let newlines = self.newlines as u128;
 587            nth_set_bit(newlines, row as usize) + 1
 588        } else {
 589            0
 590        };
 591        let row_len = if row_start == MAX_BASE {
 592            0
 593        } else {
 594            cmp::min(
 595                (self.newlines >> row_start).trailing_zeros(),
 596                (self.text.len() - row_start) as u32,
 597            )
 598        };
 599        row_start..row_start + row_len as usize
 600    }
 601
 602    #[inline(always)]
 603    pub fn tabs(&self) -> Tabs {
 604        Tabs {
 605            tabs: self.tabs,
 606            chars: self.chars,
 607        }
 608    }
 609}
 610
 611pub struct Tabs {
 612    tabs: Bitmap,
 613    chars: Bitmap,
 614}
 615
 616#[derive(Debug, PartialEq, Eq)]
 617pub struct TabPosition {
 618    pub byte_offset: usize,
 619    pub char_offset: usize,
 620}
 621
 622impl Iterator for Tabs {
 623    type Item = TabPosition;
 624
 625    fn next(&mut self) -> Option<Self::Item> {
 626        if self.tabs == 0 {
 627            return None;
 628        }
 629
 630        let tab_offset = self.tabs.trailing_zeros() as usize;
 631        let chars_mask = (1 << tab_offset) - 1;
 632        let char_offset = (self.chars & chars_mask).count_ones() as usize;
 633
 634        // Since tabs are 1 byte the tab offset is the same as the byte offset
 635        let position = TabPosition {
 636            byte_offset: tab_offset,
 637            char_offset,
 638        };
 639        // Remove the tab we've just seen
 640        self.tabs ^= 1 << tab_offset;
 641
 642        Some(position)
 643    }
 644}
 645
 646/// Finds the n-th bit that is set to 1.
 647#[inline(always)]
 648fn nth_set_bit(v: u128, n: usize) -> usize {
 649    let low = v as u64;
 650    let high = (v >> 64) as u64;
 651
 652    let low_count = low.count_ones() as usize;
 653    if n > low_count {
 654        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
 655    } else {
 656        nth_set_bit_u64(low, n as u64) as usize
 657    }
 658}
 659
 660#[cold]
 661#[inline(never)]
 662#[track_caller]
 663fn panic_char_boundary(text: &str, offset: usize) -> ! {
 664    if offset > text.len() {
 665        panic!(
 666            "byte index {} is out of bounds of `{:?}` (length: {})",
 667            offset,
 668            text,
 669            text.len()
 670        );
 671    }
 672    // find the character
 673    let char_start = text.floor_char_boundary(offset);
 674    // `char_start` must be less than len and a char boundary
 675    let ch = text.get(char_start..).unwrap().chars().next().unwrap();
 676    let char_range = char_start..char_start + ch.len_utf8();
 677    panic!(
 678        "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
 679        offset, ch, char_range,
 680    );
 681}
 682
 683#[cold]
 684#[inline(never)]
 685#[track_caller]
 686fn log_err_char_boundary(text: &str, offset: usize) {
 687    if offset > text.len() {
 688        log::error!(
 689            "byte index {} is out of bounds of `{:?}` (length: {})",
 690            offset,
 691            text,
 692            text.len()
 693        );
 694    }
 695    // find the character
 696    let char_start = text.floor_char_boundary(offset);
 697    // `char_start` must be less than len and a char boundary
 698    let ch = text.get(char_start..).unwrap().chars().next().unwrap();
 699    let char_range = char_start..char_start + ch.len_utf8();
 700    log::error!(
 701        "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
 702        offset,
 703        ch,
 704        char_range,
 705    );
 706}
 707
 708#[inline(always)]
 709fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
 710    let v = v.reverse_bits();
 711    let mut s: u64 = 64;
 712
 713    // Parallel bit count intermediates
 714    let a = v - ((v >> 1) & (u64::MAX / 3));
 715    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
 716    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
 717    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
 718
 719    // Branchless select
 720    let t = (d >> 32) + (d >> 48);
 721    s -= (t.wrapping_sub(n) & 256) >> 3;
 722    n -= t & (t.wrapping_sub(n) >> 8);
 723
 724    let t = (d >> (s - 16)) & 0xff;
 725    s -= (t.wrapping_sub(n) & 256) >> 4;
 726    n -= t & (t.wrapping_sub(n) >> 8);
 727
 728    let t = (c >> (s - 8)) & 0xf;
 729    s -= (t.wrapping_sub(n) & 256) >> 5;
 730    n -= t & (t.wrapping_sub(n) >> 8);
 731
 732    let t = (b >> (s - 4)) & 0x7;
 733    s -= (t.wrapping_sub(n) & 256) >> 6;
 734    n -= t & (t.wrapping_sub(n) >> 8);
 735
 736    let t = (a >> (s - 2)) & 0x3;
 737    s -= (t.wrapping_sub(n) & 256) >> 7;
 738    n -= t & (t.wrapping_sub(n) >> 8);
 739
 740    let t = (v >> (s - 1)) & 0x1;
 741    s -= (t.wrapping_sub(n) & 256) >> 8;
 742
 743    65 - s - 1
 744}
 745
 746#[cfg(test)]
 747mod tests {
 748    use super::*;
 749    use rand::prelude::*;
 750    use util::RandomCharIter;
 751
 752    #[gpui::test(iterations = 100)]
 753    fn test_random_chunks(mut rng: StdRng) {
 754        let text = random_string_with_utf8_len(&mut rng, MAX_BASE);
 755        log::info!("Chunk: {:?}", text);
 756        let chunk = Chunk::new(&text);
 757        verify_chunk(chunk.as_slice(), &text);
 758
 759        // Verify Chunk::chars() bitmap
 760        let expected_chars = char_offsets(&text)
 761            .into_iter()
 762            .inspect(|i| assert!(*i < MAX_BASE))
 763            .fold(0 as Bitmap, |acc, i| acc | (1 << i));
 764        assert_eq!(chunk.chars(), expected_chars);
 765
 766        for _ in 0..10 {
 767            let mut start = rng.random_range(0..=chunk.text.len());
 768            let mut end = rng.random_range(start..=chunk.text.len());
 769            while !chunk.text.is_char_boundary(start) {
 770                start -= 1;
 771            }
 772            while !chunk.text.is_char_boundary(end) {
 773                end -= 1;
 774            }
 775            let range = start..end;
 776            log::info!("Range: {:?}", range);
 777            let text_slice = &text[range.clone()];
 778            let chunk_slice = chunk.slice(range);
 779            verify_chunk(chunk_slice, text_slice);
 780        }
 781    }
 782
 783    #[gpui::test(iterations = 100)]
 784    fn test_split_chunk_slice(mut rng: StdRng) {
 785        let text = &random_string_with_utf8_len(&mut rng, MAX_BASE);
 786        let chunk = Chunk::new(text);
 787        let offset = char_offsets_with_end(text)
 788            .into_iter()
 789            .choose(&mut rng)
 790            .unwrap();
 791        let (a, b) = chunk.as_slice().split_at(offset);
 792        let (a_str, b_str) = text.split_at(offset);
 793        verify_chunk(a, a_str);
 794        verify_chunk(b, b_str);
 795    }
 796
 797    #[gpui::test(iterations = 1000)]
 798    fn test_nth_set_bit_random(mut rng: StdRng) {
 799        let set_count = rng.random_range(0..=128);
 800        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
 801        set_bits.sort();
 802        let mut n = 0;
 803        for ix in set_bits.iter().copied() {
 804            n |= 1 << ix;
 805        }
 806
 807        for (mut ix, position) in set_bits.into_iter().enumerate() {
 808            ix += 1;
 809            assert_eq!(
 810                nth_set_bit(n, ix),
 811                position,
 812                "nth_set_bit({:0128b}, {})",
 813                n,
 814                ix
 815            );
 816        }
 817    }
 818
 819    /// Returns a (biased) random string whose UTF-8 length is no more than `len`.
 820    fn random_string_with_utf8_len(rng: &mut StdRng, len: usize) -> String {
 821        let mut str = String::new();
 822        let mut chars = RandomCharIter::new(rng);
 823        loop {
 824            let ch = chars.next().unwrap();
 825            if str.len() + ch.len_utf8() > len {
 826                break;
 827            }
 828            str.push(ch);
 829        }
 830        str
 831    }
 832
 833    #[gpui::test(iterations = 1000)]
 834    fn test_append_random_strings(mut rng: StdRng) {
 835        let len1 = rng.random_range(0..=MAX_BASE);
 836        let len2 = rng.random_range(0..=MAX_BASE).saturating_sub(len1);
 837        let str1 = random_string_with_utf8_len(&mut rng, len1);
 838        let str2 = random_string_with_utf8_len(&mut rng, len2);
 839        let mut chunk1 = Chunk::new(&str1);
 840        let chunk2 = Chunk::new(&str2);
 841        let char_offsets = char_offsets_with_end(&str2);
 842        let start_index = rng.random_range(0..char_offsets.len());
 843        let start_offset = char_offsets[start_index];
 844        let end_offset = char_offsets[rng.random_range(start_index..char_offsets.len())];
 845        chunk1.append(chunk2.slice(start_offset..end_offset));
 846        verify_chunk(chunk1.as_slice(), &(str1 + &str2[start_offset..end_offset]));
 847    }
 848
 849    /// Return the byte offsets for each character in a string.
 850    ///
 851    /// These are valid offsets to split the string.
 852    fn char_offsets(text: &str) -> Vec<usize> {
 853        text.char_indices().map(|(i, _c)| i).collect()
 854    }
 855
 856    /// Return the byte offsets for each character in a string, plus the offset
 857    /// past the end of the string.
 858    fn char_offsets_with_end(text: &str) -> Vec<usize> {
 859        let mut v = char_offsets(text);
 860        v.push(text.len());
 861        v
 862    }
 863
 864    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
 865        let mut offset = 0;
 866        let mut offset_utf16 = OffsetUtf16(0);
 867        let mut point = Point::zero();
 868        let mut point_utf16 = PointUtf16::zero();
 869
 870        log::info!("Verifying chunk {:?}", text);
 871        assert_eq!(chunk.offset_to_point(0), Point::zero());
 872
 873        let mut expected_tab_positions = Vec::new();
 874
 875        for (char_offset, c) in text.chars().enumerate() {
 876            let expected_point = chunk.offset_to_point(offset);
 877            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
 878            assert_eq!(
 879                chunk.point_to_offset(point),
 880                offset,
 881                "mismatch at point {:?}",
 882                point
 883            );
 884            assert_eq!(
 885                chunk.offset_to_offset_utf16(offset),
 886                offset_utf16,
 887                "mismatch at offset {}",
 888                offset
 889            );
 890            assert_eq!(
 891                chunk.offset_utf16_to_offset(offset_utf16),
 892                offset,
 893                "mismatch at offset_utf16 {:?}",
 894                offset_utf16
 895            );
 896            assert_eq!(
 897                chunk.point_to_point_utf16(point),
 898                point_utf16,
 899                "mismatch at point {:?}",
 900                point
 901            );
 902            assert_eq!(
 903                chunk.point_utf16_to_offset(point_utf16, false),
 904                offset,
 905                "mismatch at point_utf16 {:?}",
 906                point_utf16
 907            );
 908            assert_eq!(
 909                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 910                point,
 911                "mismatch for unclipped_point_utf16_to_point at {:?}",
 912                point_utf16
 913            );
 914
 915            assert_eq!(
 916                chunk.clip_point(point, Bias::Left),
 917                point,
 918                "incorrect left clip at {:?}",
 919                point
 920            );
 921            assert_eq!(
 922                chunk.clip_point(point, Bias::Right),
 923                point,
 924                "incorrect right clip at {:?}",
 925                point
 926            );
 927
 928            for i in 1..c.len_utf8() {
 929                let test_point = Point::new(point.row, point.column + i as u32);
 930                assert_eq!(
 931                    chunk.clip_point(test_point, Bias::Left),
 932                    point,
 933                    "incorrect left clip within multi-byte char at {:?}",
 934                    test_point
 935                );
 936                assert_eq!(
 937                    chunk.clip_point(test_point, Bias::Right),
 938                    Point::new(point.row, point.column + c.len_utf8() as u32),
 939                    "incorrect right clip within multi-byte char at {:?}",
 940                    test_point
 941                );
 942            }
 943
 944            for i in 1..c.len_utf16() {
 945                let test_point = Unclipped(PointUtf16::new(
 946                    point_utf16.row,
 947                    point_utf16.column + i as u32,
 948                ));
 949                assert_eq!(
 950                    chunk.unclipped_point_utf16_to_point(test_point),
 951                    point,
 952                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
 953                    test_point
 954                );
 955                assert_eq!(
 956                    chunk.clip_point_utf16(test_point, Bias::Left),
 957                    point_utf16,
 958                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
 959                    test_point
 960                );
 961                assert_eq!(
 962                    chunk.clip_point_utf16(test_point, Bias::Right),
 963                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
 964                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
 965                    test_point
 966                );
 967
 968                let test_offset = OffsetUtf16(offset_utf16.0 + i);
 969                assert_eq!(
 970                    chunk.clip_offset_utf16(test_offset, Bias::Left),
 971                    offset_utf16,
 972                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
 973                    test_offset
 974                );
 975                assert_eq!(
 976                    chunk.clip_offset_utf16(test_offset, Bias::Right),
 977                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
 978                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
 979                    test_offset
 980                );
 981            }
 982
 983            if c == '\n' {
 984                point.row += 1;
 985                point.column = 0;
 986                point_utf16.row += 1;
 987                point_utf16.column = 0;
 988            } else {
 989                point.column += c.len_utf8() as u32;
 990                point_utf16.column += c.len_utf16() as u32;
 991            }
 992
 993            if c == '\t' {
 994                expected_tab_positions.push(TabPosition {
 995                    byte_offset: offset,
 996                    char_offset,
 997                });
 998            }
 999
1000            offset += c.len_utf8();
1001            offset_utf16.0 += c.len_utf16();
1002        }
1003
1004        let final_point = chunk.offset_to_point(offset);
1005        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
1006        assert_eq!(
1007            chunk.point_to_offset(point),
1008            offset,
1009            "mismatch at point {:?}",
1010            point
1011        );
1012        assert_eq!(
1013            chunk.offset_to_offset_utf16(offset),
1014            offset_utf16,
1015            "mismatch at offset {}",
1016            offset
1017        );
1018        assert_eq!(
1019            chunk.offset_utf16_to_offset(offset_utf16),
1020            offset,
1021            "mismatch at offset_utf16 {:?}",
1022            offset_utf16
1023        );
1024        assert_eq!(
1025            chunk.point_to_point_utf16(point),
1026            point_utf16,
1027            "mismatch at final point {:?}",
1028            point
1029        );
1030        assert_eq!(
1031            chunk.point_utf16_to_offset(point_utf16, false),
1032            offset,
1033            "mismatch at final point_utf16 {:?}",
1034            point_utf16
1035        );
1036        assert_eq!(
1037            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
1038            point,
1039            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
1040            point_utf16
1041        );
1042        assert_eq!(
1043            chunk.clip_point(point, Bias::Left),
1044            point,
1045            "incorrect left clip at final point {:?}",
1046            point
1047        );
1048        assert_eq!(
1049            chunk.clip_point(point, Bias::Right),
1050            point,
1051            "incorrect right clip at final point {:?}",
1052            point
1053        );
1054        assert_eq!(
1055            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
1056            point_utf16,
1057            "incorrect left clip_point_utf16 at final point {:?}",
1058            point_utf16
1059        );
1060        assert_eq!(
1061            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
1062            point_utf16,
1063            "incorrect right clip_point_utf16 at final point {:?}",
1064            point_utf16
1065        );
1066        assert_eq!(
1067            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
1068            offset_utf16,
1069            "incorrect left clip_offset_utf16 at final offset {:?}",
1070            offset_utf16
1071        );
1072        assert_eq!(
1073            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
1074            offset_utf16,
1075            "incorrect right clip_offset_utf16 at final offset {:?}",
1076            offset_utf16
1077        );
1078
1079        // Verify length methods
1080        assert_eq!(chunk.len(), text.len());
1081        assert_eq!(
1082            chunk.len_utf16().0,
1083            text.chars().map(|c| c.len_utf16()).sum::<usize>()
1084        );
1085
1086        // Verify line counting
1087        let lines = chunk.lines();
1088        let mut newline_count = 0;
1089        let mut last_line_len = 0;
1090        for c in text.chars() {
1091            if c == '\n' {
1092                newline_count += 1;
1093                last_line_len = 0;
1094            } else {
1095                last_line_len += c.len_utf8() as u32;
1096            }
1097        }
1098        assert_eq!(lines, Point::new(newline_count, last_line_len));
1099
1100        // Verify first/last line chars
1101        if !text.is_empty() {
1102            let first_line = text.split('\n').next().unwrap();
1103            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
1104
1105            let last_line = text.split('\n').next_back().unwrap();
1106            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
1107            assert_eq!(
1108                chunk.last_line_len_utf16(),
1109                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
1110            );
1111        }
1112
1113        // Verify longest row
1114        let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
1115        let mut max_chars = 0;
1116        let mut current_row = 0;
1117        let mut current_chars = 0;
1118        let mut max_row = 0;
1119
1120        for c in text.chars() {
1121            if c == '\n' {
1122                if current_chars > max_chars {
1123                    max_chars = current_chars;
1124                    max_row = current_row;
1125                }
1126                current_row += 1;
1127                current_chars = 0;
1128            } else {
1129                current_chars += 1;
1130            }
1131        }
1132
1133        if current_chars > max_chars {
1134            max_chars = current_chars;
1135            max_row = current_row;
1136        }
1137
1138        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
1139        assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
1140    }
1141}