chunk.rs

   1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
   2use arrayvec::ArrayString;
   3use std::{cmp, ops::Range};
   4use sum_tree::Bias;
   5use unicode_segmentation::GraphemeCursor;
   6use util::debug_panic;
   7
   8#[cfg(not(all(test, not(rust_analyzer))))]
   9pub(crate) type Bitmap = u128;
  10#[cfg(all(test, not(rust_analyzer)))]
  11pub(crate) type Bitmap = u16;
  12
  13pub(crate) const MIN_BASE: usize = MAX_BASE / 2;
  14pub(crate) const MAX_BASE: usize = Bitmap::BITS as usize;
  15
  16#[derive(Clone, Debug, Default)]
  17pub struct Chunk {
  18    /// If bit[i] is set, then the character at index i is the start of a UTF-8 character in the
  19    /// text.
  20    chars: Bitmap,
  21    /// The number of set bits is the number of UTF-16 code units it would take to represent the
  22    /// text.
  23    ///
  24    /// Bit[i] is set if text[i] is the start of a UTF-8 character. If the character would
  25    /// take two UTF-16 code units, then bit[i+1] is also set. (Rust chars never take more
  26    /// than two UTF-16 code units.)
  27    chars_utf16: Bitmap,
  28    /// If bit[i] is set, then the character at index i is an ascii newline.
  29    newlines: Bitmap,
  30    /// If bit[i] is set, then the character at index i is an ascii tab.
  31    tabs: Bitmap,
  32    pub text: ArrayString<MAX_BASE>,
  33}
  34
  35#[inline(always)]
  36const fn saturating_shl_mask(offset: u32) -> Bitmap {
  37    (1 as Bitmap).unbounded_shl(offset).wrapping_sub(1)
  38}
  39
  40#[inline(always)]
  41const fn saturating_shr_mask(offset: u32) -> Bitmap {
  42    !Bitmap::MAX.unbounded_shr(offset)
  43}
  44
  45impl Chunk {
  46    pub const MASK_BITS: usize = Bitmap::BITS as usize;
  47
  48    #[inline(always)]
  49    pub fn new(text: &str) -> Self {
  50        let mut this = Chunk::default();
  51        this.push_str(text);
  52        this
  53    }
  54
  55    #[inline(always)]
  56    pub fn push_str(&mut self, text: &str) {
  57        for (char_ix, c) in text.char_indices() {
  58            let ix = self.text.len() + char_ix;
  59            self.chars |= 1 << ix;
  60            self.chars_utf16 |= 1 << ix;
  61            self.chars_utf16 |= (c.len_utf16() as Bitmap) << ix;
  62            self.newlines |= ((c == '\n') as Bitmap) << ix;
  63            self.tabs |= ((c == '\t') as Bitmap) << ix;
  64        }
  65        self.text.push_str(text);
  66    }
  67
  68    #[inline(always)]
  69    pub fn append(&mut self, slice: ChunkSlice) {
  70        if slice.is_empty() {
  71            return;
  72        };
  73
  74        let base_ix = self.text.len();
  75        self.chars |= slice.chars << base_ix;
  76        self.chars_utf16 |= slice.chars_utf16 << base_ix;
  77        self.newlines |= slice.newlines << base_ix;
  78        self.tabs |= slice.tabs << base_ix;
  79        self.text.push_str(slice.text);
  80    }
  81
  82    #[inline(always)]
  83    pub fn as_slice(&self) -> ChunkSlice<'_> {
  84        ChunkSlice {
  85            chars: self.chars,
  86            chars_utf16: self.chars_utf16,
  87            newlines: self.newlines,
  88            tabs: self.tabs,
  89            text: &self.text,
  90        }
  91    }
  92
  93    #[inline(always)]
  94    pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
  95        self.as_slice().slice(range)
  96    }
  97
  98    #[inline(always)]
  99    pub fn chars(&self) -> Bitmap {
 100        self.chars
 101    }
 102
 103    pub fn tabs(&self) -> Bitmap {
 104        self.tabs
 105    }
 106
 107    #[inline(always)]
 108    pub fn is_char_boundary(&self, offset: usize) -> bool {
 109        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 110    }
 111
 112    pub fn floor_char_boundary(&self, index: usize) -> usize {
 113        if index >= self.text.len() {
 114            self.text.len()
 115        } else {
 116            let mut i = index;
 117            while i > 0 {
 118                if util::is_utf8_char_boundary(self.text.as_bytes()[i]) {
 119                    break;
 120                }
 121                i -= 1;
 122            }
 123
 124            i
 125        }
 126    }
 127
 128    #[track_caller]
 129    #[inline(always)]
 130    pub fn assert_char_boundary(&self, offset: usize) {
 131        if self.is_char_boundary(offset) {
 132            return;
 133        }
 134        panic_char_boundary(self, offset);
 135
 136        #[cold]
 137        #[inline(never)]
 138        #[track_caller]
 139        fn panic_char_boundary(chunk: &Chunk, offset: usize) {
 140            if offset > chunk.text.len() {
 141                panic!(
 142                    "byte index {} is out of bounds of `{:?}` (length: {})",
 143                    offset,
 144                    chunk.text,
 145                    chunk.text.len()
 146                );
 147            }
 148            // find the character
 149            let char_start = chunk.floor_char_boundary(offset);
 150            // `char_start` must be less than len and a char boundary
 151            let ch = chunk
 152                .text
 153                .get(char_start..)
 154                .unwrap()
 155                .chars()
 156                .next()
 157                .unwrap();
 158            let char_range = char_start..char_start + ch.len_utf8();
 159            panic!(
 160                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
 161                offset, ch, char_range,
 162            );
 163        }
 164    }
 165}
 166
 167#[derive(Clone, Copy, Debug)]
 168pub struct ChunkSlice<'a> {
 169    chars: Bitmap,
 170    chars_utf16: Bitmap,
 171    newlines: Bitmap,
 172    tabs: Bitmap,
 173    text: &'a str,
 174}
 175
 176impl Into<Chunk> for ChunkSlice<'_> {
 177    fn into(self) -> Chunk {
 178        Chunk {
 179            chars: self.chars,
 180            chars_utf16: self.chars_utf16,
 181            newlines: self.newlines,
 182            tabs: self.tabs,
 183            text: self.text.try_into().unwrap(),
 184        }
 185    }
 186}
 187
 188impl<'a> ChunkSlice<'a> {
 189    #[inline(always)]
 190    pub fn is_empty(&self) -> bool {
 191        self.text.is_empty()
 192    }
 193
 194    #[inline(always)]
 195    pub fn is_char_boundary(&self, offset: usize) -> bool {
 196        (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
 197    }
 198
 199    #[inline(always)]
 200    pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
 201        if mid == MAX_BASE {
 202            let left = self;
 203            let right = ChunkSlice {
 204                chars: 0,
 205                chars_utf16: 0,
 206                newlines: 0,
 207                tabs: 0,
 208                text: "",
 209            };
 210            (left, right)
 211        } else {
 212            let mask = ((1 as Bitmap) << mid) - 1;
 213            let (left_text, right_text) = self.text.split_at(mid);
 214            let left = ChunkSlice {
 215                chars: self.chars & mask,
 216                chars_utf16: self.chars_utf16 & mask,
 217                newlines: self.newlines & mask,
 218                tabs: self.tabs & mask,
 219                text: left_text,
 220            };
 221            let right = ChunkSlice {
 222                chars: self.chars >> mid,
 223                chars_utf16: self.chars_utf16 >> mid,
 224                newlines: self.newlines >> mid,
 225                tabs: self.tabs >> mid,
 226                text: right_text,
 227            };
 228            (left, right)
 229        }
 230    }
 231
 232    #[inline(always)]
 233    pub fn slice(self, range: Range<usize>) -> Self {
 234        let mask = (1 as Bitmap)
 235            .unbounded_shl(range.end as u32)
 236            .wrapping_sub(1);
 237        if range.start == MAX_BASE {
 238            Self {
 239                chars: 0,
 240                chars_utf16: 0,
 241                newlines: 0,
 242                tabs: 0,
 243                text: "",
 244            }
 245        } else {
 246            self.assert_char_boundary(range.start);
 247            self.assert_char_boundary(range.end);
 248            Self {
 249                chars: (self.chars & mask) >> range.start,
 250                chars_utf16: (self.chars_utf16 & mask) >> range.start,
 251                newlines: (self.newlines & mask) >> range.start,
 252                tabs: (self.tabs & mask) >> range.start,
 253                text: &self.text[range],
 254            }
 255        }
 256    }
 257
 258    #[inline(always)]
 259    pub fn text_summary(&self) -> TextSummary {
 260        let mut chars = 0;
 261        let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
 262        TextSummary {
 263            len: self.len(),
 264            chars,
 265            len_utf16: self.len_utf16(),
 266            lines: self.lines(),
 267            first_line_chars: self.first_line_chars(),
 268            last_line_chars: self.last_line_chars(),
 269            last_line_len_utf16: self.last_line_len_utf16(),
 270            longest_row,
 271            longest_row_chars,
 272        }
 273    }
 274
 275    /// Get length in bytes
 276    #[inline(always)]
 277    pub fn len(&self) -> usize {
 278        self.text.len()
 279    }
 280
 281    /// Get length in UTF-16 code units
 282    #[inline(always)]
 283    pub fn len_utf16(&self) -> OffsetUtf16 {
 284        OffsetUtf16(self.chars_utf16.count_ones() as usize)
 285    }
 286
 287    /// Get point representing number of lines and length of last line
 288    #[inline(always)]
 289    pub fn lines(&self) -> Point {
 290        let row = self.newlines.count_ones();
 291        let column = self.newlines.leading_zeros() - (Bitmap::BITS - self.text.len() as u32);
 292        Point::new(row, column)
 293    }
 294
 295    /// Get number of chars in first line
 296    #[inline(always)]
 297    pub fn first_line_chars(&self) -> u32 {
 298        (self.chars & saturating_shl_mask(self.newlines.trailing_zeros())).count_ones()
 299    }
 300
 301    /// Get number of chars in last line
 302    #[inline(always)]
 303    pub fn last_line_chars(&self) -> u32 {
 304        (self.chars & saturating_shr_mask(self.newlines.leading_zeros())).count_ones()
 305    }
 306
 307    /// Get number of UTF-16 code units in last line
 308    #[inline(always)]
 309    pub fn last_line_len_utf16(&self) -> u32 {
 310        (self.chars_utf16 & saturating_shr_mask(self.newlines.leading_zeros())).count_ones()
 311    }
 312
 313    /// Get the longest row in the chunk and its length in characters.
 314    /// Calculate the total number of characters in the chunk along the way.
 315    #[inline(always)]
 316    pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
 317        let mut chars = self.chars;
 318        let mut newlines = self.newlines;
 319        *total_chars = 0;
 320        let mut row = 0;
 321        let mut longest_row = 0;
 322        let mut longest_row_chars = 0;
 323        while newlines > 0 {
 324            let newline_ix = newlines.trailing_zeros();
 325            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
 326            *total_chars += usize::from(row_chars);
 327            if row_chars > longest_row_chars {
 328                longest_row = row;
 329                longest_row_chars = row_chars;
 330            }
 331
 332            newlines >>= newline_ix;
 333            newlines >>= 1;
 334            chars >>= newline_ix;
 335            chars >>= 1;
 336            row += 1;
 337            *total_chars += 1;
 338        }
 339
 340        let row_chars = chars.count_ones() as u8;
 341        *total_chars += usize::from(row_chars);
 342        if row_chars > longest_row_chars {
 343            (row, row_chars as u32)
 344        } else {
 345            (longest_row, longest_row_chars as u32)
 346        }
 347    }
 348
 349    #[inline(always)]
 350    pub fn offset_to_point(&self, offset: usize) -> Point {
 351        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 352        let row = (self.newlines & mask).count_ones();
 353        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 354        let column = (offset - newline_ix as usize) as u32;
 355        Point::new(row, column)
 356    }
 357
 358    #[inline(always)]
 359    pub fn point_to_offset(&self, point: Point) -> usize {
 360        if point.row > self.lines().row {
 361            debug_panic!(
 362                "point {:?} extends beyond rows for string {:?}",
 363                point,
 364                self.text
 365            );
 366            return self.len();
 367        }
 368
 369        let row_offset_range = self.offset_range_for_row(point.row);
 370        if point.column > row_offset_range.len() as u32 {
 371            debug_panic!(
 372                "point {:?} extends beyond row for string {:?}",
 373                point,
 374                self.text
 375            );
 376            row_offset_range.end
 377        } else {
 378            row_offset_range.start + point.column as usize
 379        }
 380    }
 381
 382    #[track_caller]
 383    #[inline(always)]
 384    pub fn assert_char_boundary(&self, offset: usize) {
 385        if self.is_char_boundary(offset) {
 386            return;
 387        }
 388        panic_char_boundary(self, offset);
 389
 390        #[cold]
 391        #[inline(never)]
 392        fn panic_char_boundary(chunk: &ChunkSlice, offset: usize) {
 393            if offset > chunk.text.len() {
 394                panic!(
 395                    "byte index {} is out of bounds of `{:?}` (length: {})",
 396                    offset,
 397                    chunk.text,
 398                    chunk.text.len()
 399                );
 400            }
 401            // find the character
 402            let char_start = chunk.floor_char_boundary(offset);
 403            // `char_start` must be less than len and a char boundary
 404            let ch = chunk
 405                .text
 406                .get(char_start..)
 407                .unwrap()
 408                .chars()
 409                .next()
 410                .unwrap();
 411            let char_range = char_start..char_start + ch.len_utf8();
 412            panic!(
 413                "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
 414                offset, ch, char_range,
 415            );
 416        }
 417    }
 418
 419    pub fn floor_char_boundary(&self, index: usize) -> usize {
 420        self.text.floor_char_boundary(index)
 421    }
 422
 423    #[inline(always)]
 424    pub fn point_to_offset_utf16(&self, point: Point) -> OffsetUtf16 {
 425        if point.row > self.lines().row {
 426            debug_panic!(
 427                "point {:?} extends beyond rows for string {:?}",
 428                point,
 429                self.text
 430            );
 431            return self.len_utf16();
 432        }
 433        self.offset_to_offset_utf16(self.point_to_offset(point))
 434    }
 435
 436    #[inline(always)]
 437    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
 438        let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
 439        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
 440    }
 441
 442    #[inline(always)]
 443    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
 444        if target.0 == 0 {
 445            0
 446        } else {
 447            #[cfg(not(test))]
 448            let chars_utf16 = self.chars_utf16;
 449            #[cfg(test)]
 450            let chars_utf16 = self.chars_utf16 as u128;
 451            let ix = nth_set_bit(chars_utf16, target.0) + 1;
 452            if ix == MAX_BASE {
 453                MAX_BASE
 454            } else {
 455                let utf8_additional_len = cmp::min(
 456                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
 457                    self.text.len() - ix,
 458                );
 459                ix + utf8_additional_len
 460            }
 461        }
 462    }
 463
 464    #[inline(always)]
 465    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
 466        let mask = saturating_shl_mask(offset as u32);
 467        let row = (self.newlines & saturating_shl_mask(offset as u32)).count_ones();
 468        let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
 469        let column = if newline_ix as usize == MAX_BASE {
 470            0
 471        } else {
 472            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
 473        };
 474        PointUtf16::new(row, column)
 475    }
 476
 477    #[inline(always)]
 478    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
 479        self.offset_to_point_utf16(self.point_to_offset(point))
 480    }
 481
 482    #[inline(always)]
 483    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
 484        let lines = self.lines();
 485        if point.row > lines.row {
 486            if !clip {
 487                debug_panic!(
 488                    "point {:?} is beyond this chunk's extent {:?}",
 489                    point,
 490                    self.text
 491                );
 492            }
 493            return self.len();
 494        }
 495
 496        let row_offset_range = self.offset_range_for_row(point.row);
 497        let line = self.slice(row_offset_range.clone());
 498        if point.column > line.last_line_len_utf16() {
 499            if !clip {
 500                debug_panic!(
 501                    "point {:?} is beyond the end of the line in chunk {:?}",
 502                    point,
 503                    self.text
 504                );
 505            }
 506            return line.len();
 507        }
 508
 509        let mut offset = row_offset_range.start;
 510        if point.column > 0 {
 511            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
 512            if !self.text.is_char_boundary(offset) {
 513                offset -= 1;
 514                while !self.text.is_char_boundary(offset) {
 515                    offset -= 1;
 516                }
 517                if !clip {
 518                    debug_panic!(
 519                        "point {:?} is within character in chunk {:?}",
 520                        point,
 521                        self.text,
 522                    );
 523                }
 524            }
 525        }
 526        offset
 527    }
 528
 529    #[inline(always)]
 530    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
 531        let max_point = self.lines();
 532        if point.0.row > max_point.row {
 533            return max_point;
 534        }
 535
 536        let row_offset_range = self.offset_range_for_row(point.0.row);
 537        let line = self.slice(row_offset_range);
 538        if point.0.column == 0 {
 539            Point::new(point.0.row, 0)
 540        } else if point.0.column >= line.len_utf16().0 as u32 {
 541            Point::new(point.0.row, line.len() as u32)
 542        } else {
 543            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
 544            while !line.text.is_char_boundary(column) {
 545                column -= 1;
 546            }
 547            Point::new(point.0.row, column as u32)
 548        }
 549    }
 550
 551    #[inline(always)]
 552    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
 553        let max_point = self.lines();
 554        if point.row > max_point.row {
 555            return max_point;
 556        }
 557
 558        let line = self.slice(self.offset_range_for_row(point.row));
 559        if point.column == 0 {
 560            point
 561        } else if point.column >= line.len() as u32 {
 562            Point::new(point.row, line.len() as u32)
 563        } else {
 564            let mut column = point.column as usize;
 565            let bytes = line.text.as_bytes();
 566            if bytes[column - 1] < 128 && bytes[column] < 128 {
 567                return Point::new(point.row, column as u32);
 568            }
 569
 570            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
 571            loop {
 572                if line.is_char_boundary(column)
 573                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
 574                {
 575                    break;
 576                }
 577
 578                match bias {
 579                    Bias::Left => column -= 1,
 580                    Bias::Right => column += 1,
 581                }
 582                grapheme_cursor.set_cursor(column);
 583            }
 584            Point::new(point.row, column as u32)
 585        }
 586    }
 587
 588    #[inline(always)]
 589    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
 590        let max_point = self.lines();
 591        if point.0.row > max_point.row {
 592            PointUtf16::new(max_point.row, self.last_line_len_utf16())
 593        } else {
 594            let line = self.slice(self.offset_range_for_row(point.0.row));
 595            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
 596            PointUtf16::new(point.0.row, column.0 as u32)
 597        }
 598    }
 599
 600    #[inline(always)]
 601    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
 602        if target == OffsetUtf16::default() {
 603            OffsetUtf16::default()
 604        } else if target >= self.len_utf16() {
 605            self.len_utf16()
 606        } else {
 607            let mut offset = self.offset_utf16_to_offset(target);
 608            while !self.text.is_char_boundary(offset) {
 609                if bias == Bias::Left {
 610                    offset -= 1;
 611                } else {
 612                    offset += 1;
 613                }
 614            }
 615            self.offset_to_offset_utf16(offset)
 616        }
 617    }
 618
 619    #[inline(always)]
 620    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
 621        let row_start = if row > 0 {
 622            #[cfg(not(test))]
 623            let newlines = self.newlines;
 624            #[cfg(test)]
 625            let newlines = self.newlines as u128;
 626            nth_set_bit(newlines, row as usize) + 1
 627        } else {
 628            0
 629        };
 630        let row_len = if row_start == MAX_BASE {
 631            0
 632        } else {
 633            cmp::min(
 634                (self.newlines >> row_start).trailing_zeros(),
 635                (self.text.len() - row_start) as u32,
 636            )
 637        };
 638        row_start..row_start + row_len as usize
 639    }
 640
 641    #[inline(always)]
 642    pub fn tabs(&self) -> Tabs {
 643        Tabs {
 644            tabs: self.tabs,
 645            chars: self.chars,
 646        }
 647    }
 648}
 649
 650pub struct Tabs {
 651    tabs: Bitmap,
 652    chars: Bitmap,
 653}
 654
 655#[derive(Debug, PartialEq, Eq)]
 656pub struct TabPosition {
 657    pub byte_offset: usize,
 658    pub char_offset: usize,
 659}
 660
 661impl Iterator for Tabs {
 662    type Item = TabPosition;
 663
 664    fn next(&mut self) -> Option<Self::Item> {
 665        if self.tabs == 0 {
 666            return None;
 667        }
 668
 669        let tab_offset = self.tabs.trailing_zeros() as usize;
 670        let chars_mask = (1 << tab_offset) - 1;
 671        let char_offset = (self.chars & chars_mask).count_ones() as usize;
 672
 673        // Since tabs are 1 byte the tab offset is the same as the byte offset
 674        let position = TabPosition {
 675            byte_offset: tab_offset,
 676            char_offset,
 677        };
 678        // Remove the tab we've just seen
 679        self.tabs ^= 1 << tab_offset;
 680
 681        Some(position)
 682    }
 683}
 684
 685/// Finds the n-th bit that is set to 1.
 686#[inline(always)]
 687fn nth_set_bit(v: u128, n: usize) -> usize {
 688    let low = v as u64;
 689    let high = (v >> 64) as u64;
 690
 691    let low_count = low.count_ones() as usize;
 692    if n > low_count {
 693        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
 694    } else {
 695        nth_set_bit_u64(low, n as u64) as usize
 696    }
 697}
 698
 699#[inline(always)]
 700fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
 701    let v = v.reverse_bits();
 702    let mut s: u64 = 64;
 703
 704    // Parallel bit count intermediates
 705    let a = v - ((v >> 1) & (u64::MAX / 3));
 706    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
 707    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
 708    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
 709
 710    // Branchless select
 711    let t = (d >> 32) + (d >> 48);
 712    s -= (t.wrapping_sub(n) & 256) >> 3;
 713    n -= t & (t.wrapping_sub(n) >> 8);
 714
 715    let t = (d >> (s - 16)) & 0xff;
 716    s -= (t.wrapping_sub(n) & 256) >> 4;
 717    n -= t & (t.wrapping_sub(n) >> 8);
 718
 719    let t = (c >> (s - 8)) & 0xf;
 720    s -= (t.wrapping_sub(n) & 256) >> 5;
 721    n -= t & (t.wrapping_sub(n) >> 8);
 722
 723    let t = (b >> (s - 4)) & 0x7;
 724    s -= (t.wrapping_sub(n) & 256) >> 6;
 725    n -= t & (t.wrapping_sub(n) >> 8);
 726
 727    let t = (a >> (s - 2)) & 0x3;
 728    s -= (t.wrapping_sub(n) & 256) >> 7;
 729    n -= t & (t.wrapping_sub(n) >> 8);
 730
 731    let t = (v >> (s - 1)) & 0x1;
 732    s -= (t.wrapping_sub(n) & 256) >> 8;
 733
 734    65 - s - 1
 735}
 736
 737#[cfg(test)]
 738mod tests {
 739    use super::*;
 740    use rand::prelude::*;
 741    use util::RandomCharIter;
 742
 743    #[gpui::test(iterations = 100)]
 744    fn test_random_chunks(mut rng: StdRng) {
 745        let text = random_string_with_utf8_len(&mut rng, MAX_BASE);
 746        log::info!("Chunk: {:?}", text);
 747        let chunk = Chunk::new(&text);
 748        verify_chunk(chunk.as_slice(), &text);
 749
 750        // Verify Chunk::chars() bitmap
 751        let expected_chars = char_offsets(&text)
 752            .into_iter()
 753            .inspect(|i| assert!(*i < MAX_BASE))
 754            .fold(0 as Bitmap, |acc, i| acc | (1 << i));
 755        assert_eq!(chunk.chars(), expected_chars);
 756
 757        for _ in 0..10 {
 758            let mut start = rng.random_range(0..=chunk.text.len());
 759            let mut end = rng.random_range(start..=chunk.text.len());
 760            while !chunk.text.is_char_boundary(start) {
 761                start -= 1;
 762            }
 763            while !chunk.text.is_char_boundary(end) {
 764                end -= 1;
 765            }
 766            let range = start..end;
 767            log::info!("Range: {:?}", range);
 768            let text_slice = &text[range.clone()];
 769            let chunk_slice = chunk.slice(range);
 770            verify_chunk(chunk_slice, text_slice);
 771        }
 772    }
 773
 774    #[gpui::test(iterations = 100)]
 775    fn test_split_chunk_slice(mut rng: StdRng) {
 776        let text = &random_string_with_utf8_len(&mut rng, MAX_BASE);
 777        let chunk = Chunk::new(text);
 778        let offset = char_offsets_with_end(text)
 779            .into_iter()
 780            .choose(&mut rng)
 781            .unwrap();
 782        let (a, b) = chunk.as_slice().split_at(offset);
 783        let (a_str, b_str) = text.split_at(offset);
 784        verify_chunk(a, a_str);
 785        verify_chunk(b, b_str);
 786    }
 787
 788    #[gpui::test(iterations = 1000)]
 789    fn test_nth_set_bit_random(mut rng: StdRng) {
 790        let set_count = rng.random_range(0..=128);
 791        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
 792        set_bits.sort();
 793        let mut n = 0;
 794        for ix in set_bits.iter().copied() {
 795            n |= 1 << ix;
 796        }
 797
 798        for (mut ix, position) in set_bits.into_iter().enumerate() {
 799            ix += 1;
 800            assert_eq!(
 801                nth_set_bit(n, ix),
 802                position,
 803                "nth_set_bit({:0128b}, {})",
 804                n,
 805                ix
 806            );
 807        }
 808    }
 809
 810    /// Returns a (biased) random string whose UTF-8 length is no more than `len`.
 811    fn random_string_with_utf8_len(rng: &mut StdRng, len: usize) -> String {
 812        let mut str = String::new();
 813        let mut chars = RandomCharIter::new(rng);
 814        loop {
 815            let ch = chars.next().unwrap();
 816            if str.len() + ch.len_utf8() > len {
 817                break;
 818            }
 819            str.push(ch);
 820        }
 821        str
 822    }
 823
 824    #[gpui::test(iterations = 1000)]
 825    fn test_append_random_strings(mut rng: StdRng) {
 826        let len1 = rng.random_range(0..=MAX_BASE);
 827        let len2 = rng.random_range(0..=MAX_BASE).saturating_sub(len1);
 828        let str1 = random_string_with_utf8_len(&mut rng, len1);
 829        let str2 = random_string_with_utf8_len(&mut rng, len2);
 830        let mut chunk1 = Chunk::new(&str1);
 831        let chunk2 = Chunk::new(&str2);
 832        let char_offsets = char_offsets_with_end(&str2);
 833        let start_index = rng.random_range(0..char_offsets.len());
 834        let start_offset = char_offsets[start_index];
 835        let end_offset = char_offsets[rng.random_range(start_index..char_offsets.len())];
 836        chunk1.append(chunk2.slice(start_offset..end_offset));
 837        verify_chunk(chunk1.as_slice(), &(str1 + &str2[start_offset..end_offset]));
 838    }
 839
 840    /// Return the byte offsets for each character in a string.
 841    ///
 842    /// These are valid offsets to split the string.
 843    fn char_offsets(text: &str) -> Vec<usize> {
 844        text.char_indices().map(|(i, _c)| i).collect()
 845    }
 846
 847    /// Return the byte offsets for each character in a string, plus the offset
 848    /// past the end of the string.
 849    fn char_offsets_with_end(text: &str) -> Vec<usize> {
 850        let mut v = char_offsets(text);
 851        v.push(text.len());
 852        v
 853    }
 854
 855    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
 856        let mut offset = 0;
 857        let mut offset_utf16 = OffsetUtf16(0);
 858        let mut point = Point::zero();
 859        let mut point_utf16 = PointUtf16::zero();
 860
 861        log::info!("Verifying chunk {:?}", text);
 862        assert_eq!(chunk.offset_to_point(0), Point::zero());
 863
 864        let mut expected_tab_positions = Vec::new();
 865
 866        for (char_offset, c) in text.chars().enumerate() {
 867            let expected_point = chunk.offset_to_point(offset);
 868            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
 869            assert_eq!(
 870                chunk.point_to_offset(point),
 871                offset,
 872                "mismatch at point {:?}",
 873                point
 874            );
 875            assert_eq!(
 876                chunk.offset_to_offset_utf16(offset),
 877                offset_utf16,
 878                "mismatch at offset {}",
 879                offset
 880            );
 881            assert_eq!(
 882                chunk.offset_utf16_to_offset(offset_utf16),
 883                offset,
 884                "mismatch at offset_utf16 {:?}",
 885                offset_utf16
 886            );
 887            assert_eq!(
 888                chunk.point_to_point_utf16(point),
 889                point_utf16,
 890                "mismatch at point {:?}",
 891                point
 892            );
 893            assert_eq!(
 894                chunk.point_utf16_to_offset(point_utf16, false),
 895                offset,
 896                "mismatch at point_utf16 {:?}",
 897                point_utf16
 898            );
 899            assert_eq!(
 900                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
 901                point,
 902                "mismatch for unclipped_point_utf16_to_point at {:?}",
 903                point_utf16
 904            );
 905
 906            assert_eq!(
 907                chunk.clip_point(point, Bias::Left),
 908                point,
 909                "incorrect left clip at {:?}",
 910                point
 911            );
 912            assert_eq!(
 913                chunk.clip_point(point, Bias::Right),
 914                point,
 915                "incorrect right clip at {:?}",
 916                point
 917            );
 918
 919            for i in 1..c.len_utf8() {
 920                let test_point = Point::new(point.row, point.column + i as u32);
 921                assert_eq!(
 922                    chunk.clip_point(test_point, Bias::Left),
 923                    point,
 924                    "incorrect left clip within multi-byte char at {:?}",
 925                    test_point
 926                );
 927                assert_eq!(
 928                    chunk.clip_point(test_point, Bias::Right),
 929                    Point::new(point.row, point.column + c.len_utf8() as u32),
 930                    "incorrect right clip within multi-byte char at {:?}",
 931                    test_point
 932                );
 933            }
 934
 935            for i in 1..c.len_utf16() {
 936                let test_point = Unclipped(PointUtf16::new(
 937                    point_utf16.row,
 938                    point_utf16.column + i as u32,
 939                ));
 940                assert_eq!(
 941                    chunk.unclipped_point_utf16_to_point(test_point),
 942                    point,
 943                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
 944                    test_point
 945                );
 946                assert_eq!(
 947                    chunk.clip_point_utf16(test_point, Bias::Left),
 948                    point_utf16,
 949                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
 950                    test_point
 951                );
 952                assert_eq!(
 953                    chunk.clip_point_utf16(test_point, Bias::Right),
 954                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
 955                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
 956                    test_point
 957                );
 958
 959                let test_offset = OffsetUtf16(offset_utf16.0 + i);
 960                assert_eq!(
 961                    chunk.clip_offset_utf16(test_offset, Bias::Left),
 962                    offset_utf16,
 963                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
 964                    test_offset
 965                );
 966                assert_eq!(
 967                    chunk.clip_offset_utf16(test_offset, Bias::Right),
 968                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
 969                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
 970                    test_offset
 971                );
 972            }
 973
 974            if c == '\n' {
 975                point.row += 1;
 976                point.column = 0;
 977                point_utf16.row += 1;
 978                point_utf16.column = 0;
 979            } else {
 980                point.column += c.len_utf8() as u32;
 981                point_utf16.column += c.len_utf16() as u32;
 982            }
 983
 984            if c == '\t' {
 985                expected_tab_positions.push(TabPosition {
 986                    byte_offset: offset,
 987                    char_offset,
 988                });
 989            }
 990
 991            offset += c.len_utf8();
 992            offset_utf16.0 += c.len_utf16();
 993        }
 994
 995        let final_point = chunk.offset_to_point(offset);
 996        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
 997        assert_eq!(
 998            chunk.point_to_offset(point),
 999            offset,
1000            "mismatch at point {:?}",
1001            point
1002        );
1003        assert_eq!(
1004            chunk.offset_to_offset_utf16(offset),
1005            offset_utf16,
1006            "mismatch at offset {}",
1007            offset
1008        );
1009        assert_eq!(
1010            chunk.offset_utf16_to_offset(offset_utf16),
1011            offset,
1012            "mismatch at offset_utf16 {:?}",
1013            offset_utf16
1014        );
1015        assert_eq!(
1016            chunk.point_to_point_utf16(point),
1017            point_utf16,
1018            "mismatch at final point {:?}",
1019            point
1020        );
1021        assert_eq!(
1022            chunk.point_utf16_to_offset(point_utf16, false),
1023            offset,
1024            "mismatch at final point_utf16 {:?}",
1025            point_utf16
1026        );
1027        assert_eq!(
1028            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
1029            point,
1030            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
1031            point_utf16
1032        );
1033        assert_eq!(
1034            chunk.clip_point(point, Bias::Left),
1035            point,
1036            "incorrect left clip at final point {:?}",
1037            point
1038        );
1039        assert_eq!(
1040            chunk.clip_point(point, Bias::Right),
1041            point,
1042            "incorrect right clip at final point {:?}",
1043            point
1044        );
1045        assert_eq!(
1046            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
1047            point_utf16,
1048            "incorrect left clip_point_utf16 at final point {:?}",
1049            point_utf16
1050        );
1051        assert_eq!(
1052            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
1053            point_utf16,
1054            "incorrect right clip_point_utf16 at final point {:?}",
1055            point_utf16
1056        );
1057        assert_eq!(
1058            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
1059            offset_utf16,
1060            "incorrect left clip_offset_utf16 at final offset {:?}",
1061            offset_utf16
1062        );
1063        assert_eq!(
1064            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
1065            offset_utf16,
1066            "incorrect right clip_offset_utf16 at final offset {:?}",
1067            offset_utf16
1068        );
1069
1070        // Verify length methods
1071        assert_eq!(chunk.len(), text.len());
1072        assert_eq!(
1073            chunk.len_utf16().0,
1074            text.chars().map(|c| c.len_utf16()).sum::<usize>()
1075        );
1076
1077        // Verify line counting
1078        let lines = chunk.lines();
1079        let mut newline_count = 0;
1080        let mut last_line_len = 0;
1081        for c in text.chars() {
1082            if c == '\n' {
1083                newline_count += 1;
1084                last_line_len = 0;
1085            } else {
1086                last_line_len += c.len_utf8() as u32;
1087            }
1088        }
1089        assert_eq!(lines, Point::new(newline_count, last_line_len));
1090
1091        // Verify first/last line chars
1092        if !text.is_empty() {
1093            let first_line = text.split('\n').next().unwrap();
1094            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
1095
1096            let last_line = text.split('\n').next_back().unwrap();
1097            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
1098            assert_eq!(
1099                chunk.last_line_len_utf16(),
1100                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
1101            );
1102        }
1103
1104        // Verify longest row
1105        let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
1106        let mut max_chars = 0;
1107        let mut current_row = 0;
1108        let mut current_chars = 0;
1109        let mut max_row = 0;
1110
1111        for c in text.chars() {
1112            if c == '\n' {
1113                if current_chars > max_chars {
1114                    max_chars = current_chars;
1115                    max_row = current_row;
1116                }
1117                current_row += 1;
1118                current_chars = 0;
1119            } else {
1120                current_chars += 1;
1121            }
1122        }
1123
1124        if current_chars > max_chars {
1125            max_chars = current_chars;
1126            max_row = current_row;
1127        }
1128
1129        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
1130        assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
1131    }
1132}