chunk.rs

   1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
   2use arrayvec::ArrayString;
   3use std::{cmp, ops::Range};
   4use sum_tree::Bias;
   5use unicode_segmentation::GraphemeCursor;
   6use util::debug_panic;
   7
   8#[cfg(not(all(test, not(rust_analyzer))))]
   9pub(crate) type Bitmap = u128;
  10#[cfg(all(test, not(rust_analyzer)))]
  11pub(crate) type Bitmap = u16;
  12
  13pub(crate) const MIN_BASE: usize = MAX_BASE / 2;
  14pub(crate) const MAX_BASE: usize = Bitmap::BITS as usize;
  15
  16#[derive(Clone, Debug, Default)]
  17pub struct Chunk {
  18    /// If bit[i] is set, then the character at index i is the start of a UTF-8 character in the
  19    /// text.
  20    chars: Bitmap,
  21    /// The number of set bits is the number of UTF-16 code units it would take to represent the
  22    /// text.
  23    ///
  24    /// Bit[i] is set if text[i] is the start of a UTF-8 character. If the character would
  25    /// take two UTF-16 code units, then bit[i+1] is also set. (Rust chars never take more
  26    /// than two UTF-16 code units.)
  27    chars_utf16: Bitmap,
  28    /// If bit[i] is set, then the character at index i is an ascii newline.
  29    newlines: Bitmap,
  30    /// If bit[i] is set, then the character at index i is an ascii tab.
  31    tabs: Bitmap,
  32    pub text: ArrayString<MAX_BASE>,
  33}
  34
  35impl Chunk {
  36    pub const MASK_BITS: usize = Bitmap::BITS as usize;
  37
  38    #[inline(always)]
  39    pub fn new(text: &str) -> Self {
  40        let mut this = Chunk::default();
  41        this.push_str(text);
  42        this
  43    }
  44
  45    #[inline(always)]
  46    pub fn push_str(&mut self, text: &str) {
  47        for (char_ix, c) in text.char_indices() {
  48            let ix = self.text.len() + char_ix;
  49            self.chars |= 1 << ix;
  50            self.chars_utf16 |= 1 << ix;
  51            self.chars_utf16 |= (c.len_utf16() as Bitmap) << ix;
  52            self.newlines |= ((c == '\n') as Bitmap) << ix;
  53            self.tabs |= ((c == '\t') as Bitmap) << ix;
  54        }
  55        self.text.push_str(text);
  56    }
  57
  58    #[inline(always)]
  59    pub fn append(&mut self, slice: ChunkSlice) {
  60        if slice.is_empty() {
  61            return;
  62        };
  63
  64        let base_ix = self.text.len();
  65        self.chars |= slice.chars << base_ix;
  66        self.chars_utf16 |= slice.chars_utf16 << base_ix;
  67        self.newlines |= slice.newlines << base_ix;
  68        self.tabs |= slice.tabs << base_ix;
  69        self.text.push_str(slice.text);
  70    }
  71
  72    #[inline(always)]
  73    pub fn as_slice(&self) -> ChunkSlice<'_> {
  74        ChunkSlice {
  75            chars: self.chars,
  76            chars_utf16: self.chars_utf16,
  77            newlines: self.newlines,
  78            tabs: self.tabs,
  79            text: &self.text,
  80        }
  81    }
  82
  83    #[inline(always)]
  84    pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
  85        self.as_slice().slice(range)
  86    }
  87
  88    #[inline(always)]
  89    pub fn chars(&self) -> Bitmap {
  90        self.chars
  91    }
  92
  93    pub fn tabs(&self) -> Bitmap {
  94        self.tabs
  95    }
  96
  97    #[inline(always)]
  98    pub fn is_char_boundary(&self, offset: usize) -> bool {
  99        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 100    }
 101
 102    pub fn floor_char_boundary(&self, index: usize) -> usize {
 103        #[inline]
 104        pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
 105            // This is bit magic equivalent to: b < 128 || b >= 192
 106            (u8 as i8) >= -0x40
 107        }
 108
 109        if index >= self.text.len() {
 110            self.text.len()
 111        } else {
 112            let mut i = index;
 113            while i > 0 {
 114                if is_utf8_char_boundary(self.text.as_bytes()[i]) {
 115                    break;
 116                }
 117                i -= 1;
 118            }
 119
 120            i
 121        }
 122    }
 123
 124    #[track_caller]
 125    #[inline(always)]
 126    pub fn assert_char_boundary(&self, offset: usize) {
 127        if self.is_char_boundary(offset) {
 128            return;
 129        }
 130        panic_char_boundary(self, offset);
 131
 132        #[cold]
 133        #[inline(never)]
 134        fn panic_char_boundary(chunk: &Chunk, offset: usize) {
 135            if offset > chunk.text.len() {
 136                panic!(
 137                    "byte index {} is out of bounds of `{:?}` (length: {})",
 138                    offset,
 139                    chunk.text,
 140                    chunk.text.len()
 141                );
 142            }
 143            // find the character
 144            let char_start = chunk.floor_char_boundary(offset);
 145            // `char_start` must be less than len and a char boundary
 146            let ch = chunk
 147                .text
 148                .get(char_start..)
 149                .unwrap()
 150                .chars()
 151                .next()
 152                .unwrap();
 153            let char_range = char_start..char_start + ch.len_utf8();
 154            panic!(
 155                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
 156                offset, ch, char_range,
 157            );
 158        }
 159    }
 160}
 161
 162#[derive(Clone, Copy, Debug)]
 163pub struct ChunkSlice<'a> {
 164    chars: Bitmap,
 165    chars_utf16: Bitmap,
 166    newlines: Bitmap,
 167    tabs: Bitmap,
 168    text: &'a str,
 169}
 170
 171impl Into<Chunk> for ChunkSlice<'_> {
 172    fn into(self) -> Chunk {
 173        Chunk {
 174            chars: self.chars,
 175            chars_utf16: self.chars_utf16,
 176            newlines: self.newlines,
 177            tabs: self.tabs,
 178            text: self.text.try_into().unwrap(),
 179        }
 180    }
 181}
 182
 183impl<'a> ChunkSlice<'a> {
 184    #[inline(always)]
 185    pub fn is_empty(&self) -> bool {
 186        self.text.is_empty()
 187    }
 188
 189    #[inline(always)]
 190    pub fn is_char_boundary(&self, offset: usize) -> bool {
 191        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 192    }
 193
 194    #[inline(always)]
 195    pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
 196        if mid == MAX_BASE {
 197            let left = self;
 198            let right = ChunkSlice {
 199                chars: 0,
 200                chars_utf16: 0,
 201                newlines: 0,
 202                tabs: 0,
 203                text: "",
 204            };
 205            (left, right)
 206        } else {
 207            let mask = ((1 as Bitmap) << mid) - 1;
 208            let (left_text, right_text) = self.text.split_at(mid);
 209            let left = ChunkSlice {
 210                chars: self.chars & mask,
 211                chars_utf16: self.chars_utf16 & mask,
 212                newlines: self.newlines & mask,
 213                tabs: self.tabs & mask,
 214                text: left_text,
 215            };
 216            let right = ChunkSlice {
 217                chars: self.chars >> mid,
 218                chars_utf16: self.chars_utf16 >> mid,
 219                newlines: self.newlines >> mid,
 220                tabs: self.tabs >> mid,
 221                text: right_text,
 222            };
 223            (left, right)
 224        }
 225    }
 226
 227    #[inline(always)]
 228    pub fn slice(self, range: Range<usize>) -> Self {
 229        let mask = (1 as Bitmap)
 230            .unbounded_shl(range.end as u32)
 231            .wrapping_sub(1);
 232        if range.start == MAX_BASE {
 233            Self {
 234                chars: 0,
 235                chars_utf16: 0,
 236                newlines: 0,
 237                tabs: 0,
 238                text: "",
 239            }
 240        } else {
 241            self.assert_char_boundary(range.start);
 242            self.assert_char_boundary(range.end);
 243            Self {
 244                chars: (self.chars & mask) >> range.start,
 245                chars_utf16: (self.chars_utf16 & mask) >> range.start,
 246                newlines: (self.newlines & mask) >> range.start,
 247                tabs: (self.tabs & mask) >> range.start,
 248                text: &self.text[range],
 249            }
 250        }
 251    }
 252
 253    #[inline(always)]
 254    pub fn text_summary(&self) -> TextSummary {
 255        let mut chars = 0;
 256        let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
 257        TextSummary {
 258            len: self.len(),
 259            chars,
 260            len_utf16: self.len_utf16(),
 261            lines: self.lines(),
 262            first_line_chars: self.first_line_chars(),
 263            last_line_chars: self.last_line_chars(),
 264            last_line_len_utf16: self.last_line_len_utf16(),
 265            longest_row,
 266            longest_row_chars,
 267        }
 268    }
 269
 270    /// Get length in bytes
 271    #[inline(always)]
 272    pub fn len(&self) -> usize {
 273        self.text.len()
 274    }
 275
 276    /// Get length in UTF-16 code units
 277    #[inline(always)]
 278    pub fn len_utf16(&self) -> OffsetUtf16 {
 279        OffsetUtf16(self.chars_utf16.count_ones() as usize)
 280    }
 281
 282    /// Get point representing number of lines and length of last line
 283    #[inline(always)]
 284    pub fn lines(&self) -> Point {
 285        let row = self.newlines.count_ones();
 286        let column = self.newlines.leading_zeros() - (Bitmap::BITS - self.text.len() as u32);
 287        Point::new(row, column)
 288    }
 289
 290    /// Get number of chars in first line
 291    #[inline(always)]
 292    pub fn first_line_chars(&self) -> u32 {
 293        if self.newlines == 0 {
 294            self.chars.count_ones()
 295        } else {
 296            let mask = ((1 as Bitmap) << self.newlines.trailing_zeros()) - 1;
 297            (self.chars & mask).count_ones()
 298        }
 299    }
 300
 301    /// Get number of chars in last line
 302    #[inline(always)]
 303    pub fn last_line_chars(&self) -> u32 {
 304        if self.newlines == 0 {
 305            self.chars.count_ones()
 306        } else {
 307            let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
 308            (self.chars & mask).count_ones()
 309        }
 310    }
 311
 312    /// Get number of UTF-16 code units in last line
 313    #[inline(always)]
 314    pub fn last_line_len_utf16(&self) -> u32 {
 315        if self.newlines == 0 {
 316            self.chars_utf16.count_ones()
 317        } else {
 318            let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
 319            (self.chars_utf16 & mask).count_ones()
 320        }
 321    }
 322
 323    /// Get the longest row in the chunk and its length in characters.
 324    /// Calculate the total number of characters in the chunk along the way.
 325    #[inline(always)]
 326    pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
 327        let mut chars = self.chars;
 328        let mut newlines = self.newlines;
 329        *total_chars = 0;
 330        let mut row = 0;
 331        let mut longest_row = 0;
 332        let mut longest_row_chars = 0;
 333        while newlines > 0 {
 334            let newline_ix = newlines.trailing_zeros();
 335            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
 336            *total_chars += usize::from(row_chars);
 337            if row_chars > longest_row_chars {
 338                longest_row = row;
 339                longest_row_chars = row_chars;
 340            }
 341
 342            newlines >>= newline_ix;
 343            newlines >>= 1;
 344            chars >>= newline_ix;
 345            chars >>= 1;
 346            row += 1;
 347            *total_chars += 1;
 348        }
 349
 350        let row_chars = chars.count_ones() as u8;
 351        *total_chars += usize::from(row_chars);
 352        if row_chars > longest_row_chars {
 353            (row, row_chars as u32)
 354        } else {
 355            (longest_row, longest_row_chars as u32)
 356        }
 357    }
 358
 359    #[inline(always)]
 360    pub fn offset_to_point(&self, offset: usize) -> Point {
 361        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 362        let row = (self.newlines & mask).count_ones();
 363        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 364        let column = (offset - newline_ix as usize) as u32;
 365        Point::new(row, column)
 366    }
 367
 368    #[inline(always)]
 369    pub fn point_to_offset(&self, point: Point) -> usize {
 370        if point.row > self.lines().row {
 371            debug_panic!(
 372                "point {:?} extends beyond rows for string {:?}",
 373                point,
 374                self.text
 375            );
 376            return self.len();
 377        }
 378
 379        let row_offset_range = self.offset_range_for_row(point.row);
 380        if point.column > row_offset_range.len() as u32 {
 381            debug_panic!(
 382                "point {:?} extends beyond row for string {:?}",
 383                point,
 384                self.text
 385            );
 386            row_offset_range.end
 387        } else {
 388            row_offset_range.start + point.column as usize
 389        }
 390    }
 391
 392    #[track_caller]
 393    #[inline(always)]
 394    pub fn assert_char_boundary(&self, offset: usize) {
 395        if self.is_char_boundary(offset) {
 396            return;
 397        }
 398        panic_char_boundary(self, offset);
 399
 400        #[cold]
 401        #[inline(never)]
 402        fn panic_char_boundary(chunk: &ChunkSlice, offset: usize) {
 403            if offset > chunk.text.len() {
 404                panic!(
 405                    "byte index {} is out of bounds of `{:?}` (length: {})",
 406                    offset,
 407                    chunk.text,
 408                    chunk.text.len()
 409                );
 410            }
 411            // find the character
 412            let char_start = chunk.floor_char_boundary(offset);
 413            // `char_start` must be less than len and a char boundary
 414            let ch = chunk
 415                .text
 416                .get(char_start..)
 417                .unwrap()
 418                .chars()
 419                .next()
 420                .unwrap();
 421            let char_range = char_start..char_start + ch.len_utf8();
 422            panic!(
 423                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
 424                offset, ch, char_range,
 425            );
 426        }
 427    }
 428
 429    pub fn floor_char_boundary(&self, index: usize) -> usize {
 430        #[inline]
 431        pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
 432            // This is bit magic equivalent to: b < 128 || b >= 192
 433            (u8 as i8) >= -0x40
 434        }
 435
 436        if index >= self.text.len() {
 437            self.text.len()
 438        } else {
 439            let mut i = index;
 440            while i > 0 {
 441                if is_utf8_char_boundary(self.text.as_bytes()[i]) {
 442                    break;
 443                }
 444                i -= 1;
 445            }
 446
 447            i
 448        }
 449    }
 450
 451    #[inline(always)]
 452    pub fn point_to_offset_utf16(&self, point: Point) -> OffsetUtf16 {
 453        if point.row > self.lines().row {
 454            debug_panic!(
 455                "point {:?} extends beyond rows for string {:?}",
 456                point,
 457                self.text
 458            );
 459            return self.len_utf16();
 460        }
 461        self.offset_to_offset_utf16(self.point_to_offset(point))
 462    }
 463
 464    #[inline(always)]
 465    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 466        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 467        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
 468    }
 469
 470    #[inline(always)]
 471    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 472        if target.0 == 0 {
 473            0
 474        } else {
 475            #[cfg(not(test))]
 476            let chars_utf16 = self.chars_utf16;
 477            #[cfg(test)]
 478            let chars_utf16 = self.chars_utf16 as u128;
 479            let ix = nth_set_bit(chars_utf16, target.0) + 1;
 480            if ix == MAX_BASE {
 481                MAX_BASE
 482            } else {
 483                let utf8_additional_len = cmp::min(
 484                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
 485                    self.text.len() - ix,
 486                );
 487                ix + utf8_additional_len
 488            }
 489        }
 490    }
 491
 492    #[inline(always)]
 493    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 494        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 495        let row = (self.newlines & mask).count_ones();
 496        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 497        let column = if newline_ix as usize == MAX_BASE {
 498            0
 499        } else {
 500            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
 501        };
 502        PointUtf16::new(row, column)
 503    }
 504
 505    #[inline(always)]
 506    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 507        self.offset_to_point_utf16(self.point_to_offset(point))
 508    }
 509
 510    #[inline(always)]
 511    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
 512        let lines = self.lines();
 513        if point.row > lines.row {
 514            if !clip {
 515                debug_panic!(
 516                    "point {:?} is beyond this chunk's extent {:?}",
 517                    point,
 518                    self.text
 519                );
 520            }
 521            return self.len();
 522        }
 523
 524        let row_offset_range = self.offset_range_for_row(point.row);
 525        let line = self.slice(row_offset_range.clone());
 526        if point.column > line.last_line_len_utf16() {
 527            if !clip {
 528                debug_panic!(
 529                    "point {:?} is beyond the end of the line in chunk {:?}",
 530                    point,
 531                    self.text
 532                );
 533            }
 534            return line.len();
 535        }
 536
 537        let mut offset = row_offset_range.start;
 538        if point.column > 0 {
 539            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
 540            if !self.text.is_char_boundary(offset) {
 541                offset -= 1;
 542                while !self.text.is_char_boundary(offset) {
 543                    offset -= 1;
 544                }
 545                if !clip {
 546                    debug_panic!(
 547                        "point {:?} is within character in chunk {:?}",
 548                        point,
 549                        self.text,
 550                    );
 551                }
 552            }
 553        }
 554        offset
 555    }
 556
 557    #[inline(always)]
 558    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 559        let max_point = self.lines();
 560        if point.0.row > max_point.row {
 561            return max_point;
 562        }
 563
 564        let row_offset_range = self.offset_range_for_row(point.0.row);
 565        let line = self.slice(row_offset_range);
 566        if point.0.column == 0 {
 567            Point::new(point.0.row, 0)
 568        } else if point.0.column >= line.len_utf16().0 as u32 {
 569            Point::new(point.0.row, line.len() as u32)
 570        } else {
 571            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
 572            while !line.text.is_char_boundary(column) {
 573                column -= 1;
 574            }
 575            Point::new(point.0.row, column as u32)
 576        }
 577    }
 578
 579    #[inline(always)]
 580    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 581        let max_point = self.lines();
 582        if point.row > max_point.row {
 583            return max_point;
 584        }
 585
 586        let line = self.slice(self.offset_range_for_row(point.row));
 587        if point.column == 0 {
 588            point
 589        } else if point.column >= line.len() as u32 {
 590            Point::new(point.row, line.len() as u32)
 591        } else {
 592            let mut column = point.column as usize;
 593            let bytes = line.text.as_bytes();
 594            if bytes[column - 1] < 128 && bytes[column] < 128 {
 595                return Point::new(point.row, column as u32);
 596            }
 597
 598            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
 599            loop {
 600                if line.is_char_boundary(column)
 601                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
 602                {
 603                    break;
 604                }
 605
 606                match bias {
 607                    Bias::Left => column -= 1,
 608                    Bias::Right => column += 1,
 609                }
 610                grapheme_cursor.set_cursor(column);
 611            }
 612            Point::new(point.row, column as u32)
 613        }
 614    }
 615
 616    #[inline(always)]
 617    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 618        let max_point = self.lines();
 619        if point.0.row > max_point.row {
 620            PointUtf16::new(max_point.row, self.last_line_len_utf16())
 621        } else {
 622            let line = self.slice(self.offset_range_for_row(point.0.row));
 623            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
 624            PointUtf16::new(point.0.row, column.0 as u32)
 625        }
 626    }
 627
 628    #[inline(always)]
 629    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 630        if target == OffsetUtf16::default() {
 631            OffsetUtf16::default()
 632        } else if target >= self.len_utf16() {
 633            self.len_utf16()
 634        } else {
 635            let mut offset = self.offset_utf16_to_offset(target);
 636            while !self.text.is_char_boundary(offset) {
 637                if bias == Bias::Left {
 638                    offset -= 1;
 639                } else {
 640                    offset += 1;
 641                }
 642            }
 643            self.offset_to_offset_utf16(offset)
 644        }
 645    }
 646
 647    #[inline(always)]
 648    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
 649        let row_start = if row > 0 {
 650            #[cfg(not(test))]
 651            let newlines = self.newlines;
 652            #[cfg(test)]
 653            let newlines = self.newlines as u128;
 654            nth_set_bit(newlines, row as usize) + 1
 655        } else {
 656            0
 657        };
 658        let row_len = if row_start == MAX_BASE {
 659            0
 660        } else {
 661            cmp::min(
 662                (self.newlines >> row_start).trailing_zeros(),
 663                (self.text.len() - row_start) as u32,
 664            )
 665        };
 666        row_start..row_start + row_len as usize
 667    }
 668
 669    #[inline(always)]
 670    pub fn tabs(&self) -> Tabs {
 671        Tabs {
 672            tabs: self.tabs,
 673            chars: self.chars,
 674        }
 675    }
 676}
 677
 678pub struct Tabs {
 679    tabs: Bitmap,
 680    chars: Bitmap,
 681}
 682
 683#[derive(Debug, PartialEq, Eq)]
 684pub struct TabPosition {
 685    pub byte_offset: usize,
 686    pub char_offset: usize,
 687}
 688
 689impl Iterator for Tabs {
 690    type Item = TabPosition;
 691
 692    fn next(&mut self) -> Option<Self::Item> {
 693        if self.tabs == 0 {
 694            return None;
 695        }
 696
 697        let tab_offset = self.tabs.trailing_zeros() as usize;
 698        let chars_mask = (1 << tab_offset) - 1;
 699        let char_offset = (self.chars & chars_mask).count_ones() as usize;
 700
 701        // Since tabs are 1 byte the tab offset is the same as the byte offset
 702        let position = TabPosition {
 703            byte_offset: tab_offset,
 704            char_offset,
 705        };
 706        // Remove the tab we've just seen
 707        self.tabs ^= 1 << tab_offset;
 708
 709        Some(position)
 710    }
 711}
 712
 713/// Finds the n-th bit that is set to 1.
 714#[inline(always)]
 715fn nth_set_bit(v: u128, n: usize) -> usize {
 716    let low = v as u64;
 717    let high = (v >> 64) as u64;
 718
 719    let low_count = low.count_ones() as usize;
 720    if n > low_count {
 721        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
 722    } else {
 723        nth_set_bit_u64(low, n as u64) as usize
 724    }
 725}
 726
 727#[inline(always)]
 728fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
 729    let v = v.reverse_bits();
 730    let mut s: u64 = 64;
 731
 732    // Parallel bit count intermediates
 733    let a = v - ((v >> 1) & (u64::MAX / 3));
 734    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
 735    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
 736    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
 737
 738    // Branchless select
 739    let t = (d >> 32) + (d >> 48);
 740    s -= (t.wrapping_sub(n) & 256) >> 3;
 741    n -= t & (t.wrapping_sub(n) >> 8);
 742
 743    let t = (d >> (s - 16)) & 0xff;
 744    s -= (t.wrapping_sub(n) & 256) >> 4;
 745    n -= t & (t.wrapping_sub(n) >> 8);
 746
 747    let t = (c >> (s - 8)) & 0xf;
 748    s -= (t.wrapping_sub(n) & 256) >> 5;
 749    n -= t & (t.wrapping_sub(n) >> 8);
 750
 751    let t = (b >> (s - 4)) & 0x7;
 752    s -= (t.wrapping_sub(n) & 256) >> 6;
 753    n -= t & (t.wrapping_sub(n) >> 8);
 754
 755    let t = (a >> (s - 2)) & 0x3;
 756    s -= (t.wrapping_sub(n) & 256) >> 7;
 757    n -= t & (t.wrapping_sub(n) >> 8);
 758
 759    let t = (v >> (s - 1)) & 0x1;
 760    s -= (t.wrapping_sub(n) & 256) >> 8;
 761
 762    65 - s - 1
 763}
 764
 765#[cfg(test)]
 766mod tests {
 767    use super::*;
 768    use rand::prelude::*;
 769    use util::RandomCharIter;
 770
 771    #[gpui::test(iterations = 100)]
 772    fn test_random_chunks(mut rng: StdRng) {
 773        let text = random_string_with_utf8_len(&mut rng, MAX_BASE);
 774        log::info!("Chunk: {:?}", text);
 775        let chunk = Chunk::new(&text);
 776        verify_chunk(chunk.as_slice(), &text);
 777
 778        // Verify Chunk::chars() bitmap
 779        let expected_chars = char_offsets(&text)
 780            .into_iter()
 781            .inspect(|i| assert!(*i < MAX_BASE))
 782            .fold(0 as Bitmap, |acc, i| acc | (1 << i));
 783        assert_eq!(chunk.chars(), expected_chars);
 784
 785        for _ in 0..10 {
 786            let mut start = rng.random_range(0..=chunk.text.len());
 787            let mut end = rng.random_range(start..=chunk.text.len());
 788            while !chunk.text.is_char_boundary(start) {
 789                start -= 1;
 790            }
 791            while !chunk.text.is_char_boundary(end) {
 792                end -= 1;
 793            }
 794            let range = start..end;
 795            log::info!("Range: {:?}", range);
 796            let text_slice = &text[range.clone()];
 797            let chunk_slice = chunk.slice(range);
 798            verify_chunk(chunk_slice, text_slice);
 799        }
 800    }
 801
 802    #[gpui::test(iterations = 100)]
 803    fn test_split_chunk_slice(mut rng: StdRng) {
 804        let text = &random_string_with_utf8_len(&mut rng, MAX_BASE);
 805        let chunk = Chunk::new(text);
 806        let offset = char_offsets_with_end(text)
 807            .into_iter()
 808            .choose(&mut rng)
 809            .unwrap();
 810        let (a, b) = chunk.as_slice().split_at(offset);
 811        let (a_str, b_str) = text.split_at(offset);
 812        verify_chunk(a, a_str);
 813        verify_chunk(b, b_str);
 814    }
 815
 816    #[gpui::test(iterations = 1000)]
 817    fn test_nth_set_bit_random(mut rng: StdRng) {
 818        let set_count = rng.random_range(0..=128);
 819        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
 820        set_bits.sort();
 821        let mut n = 0;
 822        for ix in set_bits.iter().copied() {
 823            n |= 1 << ix;
 824        }
 825
 826        for (mut ix, position) in set_bits.into_iter().enumerate() {
 827            ix += 1;
 828            assert_eq!(
 829                nth_set_bit(n, ix),
 830                position,
 831                "nth_set_bit({:0128b}, {})",
 832                n,
 833                ix
 834            );
 835        }
 836    }
 837
 838    /// Returns a (biased) random string whose UTF-8 length is no more than `len`.
 839    fn random_string_with_utf8_len(rng: &mut StdRng, len: usize) -> String {
 840        let mut str = String::new();
 841        let mut chars = RandomCharIter::new(rng);
 842        loop {
 843            let ch = chars.next().unwrap();
 844            if str.len() + ch.len_utf8() > len {
 845                break;
 846            }
 847            str.push(ch);
 848        }
 849        str
 850    }
 851
 852    #[gpui::test(iterations = 1000)]
 853    fn test_append_random_strings(mut rng: StdRng) {
 854        let len1 = rng.random_range(0..=MAX_BASE);
 855        let len2 = rng.random_range(0..=MAX_BASE).saturating_sub(len1);
 856        let str1 = random_string_with_utf8_len(&mut rng, len1);
 857        let str2 = random_string_with_utf8_len(&mut rng, len2);
 858        let mut chunk1 = Chunk::new(&str1);
 859        let chunk2 = Chunk::new(&str2);
 860        let char_offsets = char_offsets_with_end(&str2);
 861        let start_index = rng.random_range(0..char_offsets.len());
 862        let start_offset = char_offsets[start_index];
 863        let end_offset = char_offsets[rng.random_range(start_index..char_offsets.len())];
 864        chunk1.append(chunk2.slice(start_offset..end_offset));
 865        verify_chunk(chunk1.as_slice(), &(str1 + &str2[start_offset..end_offset]));
 866    }
 867
 868    /// Return the byte offsets for each character in a string.
 869    ///
 870    /// These are valid offsets to split the string.
 871    fn char_offsets(text: &str) -> Vec<usize> {
 872        text.char_indices().map(|(i, _c)| i).collect()
 873    }
 874
 875    /// Return the byte offsets for each character in a string, plus the offset
 876    /// past the end of the string.
 877    fn char_offsets_with_end(text: &str) -> Vec<usize> {
 878        let mut v = char_offsets(text);
 879        v.push(text.len());
 880        v
 881    }
 882
 883    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
 884        let mut offset = 0;
 885        let mut offset_utf16 = OffsetUtf16(0);
 886        let mut point = Point::zero();
 887        let mut point_utf16 = PointUtf16::zero();
 888
 889        log::info!("Verifying chunk {:?}", text);
 890        assert_eq!(chunk.offset_to_point(0), Point::zero());
 891
 892        let mut expected_tab_positions = Vec::new();
 893
 894        for (char_offset, c) in text.chars().enumerate() {
 895            let expected_point = chunk.offset_to_point(offset);
 896            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
 897            assert_eq!(
 898                chunk.point_to_offset(point),
 899                offset,
 900                "mismatch at point {:?}",
 901                point
 902            );
 903            assert_eq!(
 904                chunk.offset_to_offset_utf16(offset),
 905                offset_utf16,
 906                "mismatch at offset {}",
 907                offset
 908            );
 909            assert_eq!(
 910                chunk.offset_utf16_to_offset(offset_utf16),
 911                offset,
 912                "mismatch at offset_utf16 {:?}",
 913                offset_utf16
 914            );
 915            assert_eq!(
 916                chunk.point_to_point_utf16(point),
 917                point_utf16,
 918                "mismatch at point {:?}",
 919                point
 920            );
 921            assert_eq!(
 922                chunk.point_utf16_to_offset(point_utf16, false),
 923                offset,
 924                "mismatch at point_utf16 {:?}",
 925                point_utf16
 926            );
 927            assert_eq!(
 928                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 929                point,
 930                "mismatch for unclipped_point_utf16_to_point at {:?}",
 931                point_utf16
 932            );
 933
 934            assert_eq!(
 935                chunk.clip_point(point, Bias::Left),
 936                point,
 937                "incorrect left clip at {:?}",
 938                point
 939            );
 940            assert_eq!(
 941                chunk.clip_point(point, Bias::Right),
 942                point,
 943                "incorrect right clip at {:?}",
 944                point
 945            );
 946
 947            for i in 1..c.len_utf8() {
 948                let test_point = Point::new(point.row, point.column + i as u32);
 949                assert_eq!(
 950                    chunk.clip_point(test_point, Bias::Left),
 951                    point,
 952                    "incorrect left clip within multi-byte char at {:?}",
 953                    test_point
 954                );
 955                assert_eq!(
 956                    chunk.clip_point(test_point, Bias::Right),
 957                    Point::new(point.row, point.column + c.len_utf8() as u32),
 958                    "incorrect right clip within multi-byte char at {:?}",
 959                    test_point
 960                );
 961            }
 962
 963            for i in 1..c.len_utf16() {
 964                let test_point = Unclipped(PointUtf16::new(
 965                    point_utf16.row,
 966                    point_utf16.column + i as u32,
 967                ));
 968                assert_eq!(
 969                    chunk.unclipped_point_utf16_to_point(test_point),
 970                    point,
 971                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
 972                    test_point
 973                );
 974                assert_eq!(
 975                    chunk.clip_point_utf16(test_point, Bias::Left),
 976                    point_utf16,
 977                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
 978                    test_point
 979                );
 980                assert_eq!(
 981                    chunk.clip_point_utf16(test_point, Bias::Right),
 982                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
 983                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
 984                    test_point
 985                );
 986
 987                let test_offset = OffsetUtf16(offset_utf16.0 + i);
 988                assert_eq!(
 989                    chunk.clip_offset_utf16(test_offset, Bias::Left),
 990                    offset_utf16,
 991                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
 992                    test_offset
 993                );
 994                assert_eq!(
 995                    chunk.clip_offset_utf16(test_offset, Bias::Right),
 996                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
 997                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
 998                    test_offset
 999                );
1000            }
1001
1002            if c == '\n' {
1003                point.row += 1;
1004                point.column = 0;
1005                point_utf16.row += 1;
1006                point_utf16.column = 0;
1007            } else {
1008                point.column += c.len_utf8() as u32;
1009                point_utf16.column += c.len_utf16() as u32;
1010            }
1011
1012            if c == '\t' {
1013                expected_tab_positions.push(TabPosition {
1014                    byte_offset: offset,
1015                    char_offset,
1016                });
1017            }
1018
1019            offset += c.len_utf8();
1020            offset_utf16.0 += c.len_utf16();
1021        }
1022
1023        let final_point = chunk.offset_to_point(offset);
1024        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
1025        assert_eq!(
1026            chunk.point_to_offset(point),
1027            offset,
1028            "mismatch at point {:?}",
1029            point
1030        );
1031        assert_eq!(
1032            chunk.offset_to_offset_utf16(offset),
1033            offset_utf16,
1034            "mismatch at offset {}",
1035            offset
1036        );
1037        assert_eq!(
1038            chunk.offset_utf16_to_offset(offset_utf16),
1039            offset,
1040            "mismatch at offset_utf16 {:?}",
1041            offset_utf16
1042        );
1043        assert_eq!(
1044            chunk.point_to_point_utf16(point),
1045            point_utf16,
1046            "mismatch at final point {:?}",
1047            point
1048        );
1049        assert_eq!(
1050            chunk.point_utf16_to_offset(point_utf16, false),
1051            offset,
1052            "mismatch at final point_utf16 {:?}",
1053            point_utf16
1054        );
1055        assert_eq!(
1056            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
1057            point,
1058            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
1059            point_utf16
1060        );
1061        assert_eq!(
1062            chunk.clip_point(point, Bias::Left),
1063            point,
1064            "incorrect left clip at final point {:?}",
1065            point
1066        );
1067        assert_eq!(
1068            chunk.clip_point(point, Bias::Right),
1069            point,
1070            "incorrect right clip at final point {:?}",
1071            point
1072        );
1073        assert_eq!(
1074            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
1075            point_utf16,
1076            "incorrect left clip_point_utf16 at final point {:?}",
1077            point_utf16
1078        );
1079        assert_eq!(
1080            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
1081            point_utf16,
1082            "incorrect right clip_point_utf16 at final point {:?}",
1083            point_utf16
1084        );
1085        assert_eq!(
1086            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
1087            offset_utf16,
1088            "incorrect left clip_offset_utf16 at final offset {:?}",
1089            offset_utf16
1090        );
1091        assert_eq!(
1092            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
1093            offset_utf16,
1094            "incorrect right clip_offset_utf16 at final offset {:?}",
1095            offset_utf16
1096        );
1097
1098        // Verify length methods
1099        assert_eq!(chunk.len(), text.len());
1100        assert_eq!(
1101            chunk.len_utf16().0,
1102            text.chars().map(|c| c.len_utf16()).sum::<usize>()
1103        );
1104
1105        // Verify line counting
1106        let lines = chunk.lines();
1107        let mut newline_count = 0;
1108        let mut last_line_len = 0;
1109        for c in text.chars() {
1110            if c == '\n' {
1111                newline_count += 1;
1112                last_line_len = 0;
1113            } else {
1114                last_line_len += c.len_utf8() as u32;
1115            }
1116        }
1117        assert_eq!(lines, Point::new(newline_count, last_line_len));
1118
1119        // Verify first/last line chars
1120        if !text.is_empty() {
1121            let first_line = text.split('\n').next().unwrap();
1122            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
1123
1124            let last_line = text.split('\n').next_back().unwrap();
1125            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
1126            assert_eq!(
1127                chunk.last_line_len_utf16(),
1128                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
1129            );
1130        }
1131
1132        // Verify longest row
1133        let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
1134        let mut max_chars = 0;
1135        let mut current_row = 0;
1136        let mut current_chars = 0;
1137        let mut max_row = 0;
1138
1139        for c in text.chars() {
1140            if c == '\n' {
1141                if current_chars > max_chars {
1142                    max_chars = current_chars;
1143                    max_row = current_row;
1144                }
1145                current_row += 1;
1146                current_chars = 0;
1147            } else {
1148                current_chars += 1;
1149            }
1150        }
1151
1152        if current_chars > max_chars {
1153            max_chars = current_chars;
1154            max_row = current_row;
1155        }
1156
1157        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
1158        assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
1159    }
1160}