chunk.rs

  1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
  2use arrayvec::ArrayString;
  3use std::{cmp, ops::Range};
  4use sum_tree::Bias;
  5use unicode_segmentation::GraphemeCursor;
  6use util::debug_panic;
  7
  8pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
  9pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
 10
 11#[derive(Clone, Debug, Default)]
 12pub struct Chunk {
 13    chars: u128,
 14    chars_utf16: u128,
 15    newlines: u128,
 16    pub tabs: u128,
 17    pub text: ArrayString<MAX_BASE>,
 18}
 19
 20impl Chunk {
 21    #[inline(always)]
 22    pub fn new(text: &str) -> Self {
 23        let mut this = Chunk::default();
 24        this.push_str(text);
 25        this
 26    }
 27
 28    #[inline(always)]
 29    pub fn push_str(&mut self, text: &str) {
 30        for (char_ix, c) in text.char_indices() {
 31            let ix = self.text.len() + char_ix;
 32            self.chars |= 1 << ix;
 33            self.chars_utf16 |= 1 << ix;
 34            self.chars_utf16 |= (c.len_utf16() as u128) << ix;
 35            self.newlines |= ((c == '\n') as u128) << ix;
 36            self.tabs |= ((c == '\t') as u128) << ix;
 37        }
 38        self.text.push_str(text);
 39    }
 40
 41    #[inline(always)]
 42    pub fn append(&mut self, slice: ChunkSlice) {
 43        if slice.is_empty() {
 44            return;
 45        };
 46
 47        let base_ix = self.text.len();
 48        self.chars |= slice.chars << base_ix;
 49        self.chars_utf16 |= slice.chars_utf16 << base_ix;
 50        self.newlines |= slice.newlines << base_ix;
 51        self.tabs |= slice.tabs << base_ix;
 52        self.text.push_str(slice.text);
 53    }
 54
 55    #[inline(always)]
 56    pub fn as_slice(&self) -> ChunkSlice<'_> {
 57        ChunkSlice {
 58            chars: self.chars,
 59            chars_utf16: self.chars_utf16,
 60            newlines: self.newlines,
 61            tabs: self.tabs,
 62            text: &self.text,
 63        }
 64    }
 65
 66    #[inline(always)]
 67    pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
 68        self.as_slice().slice(range)
 69    }
 70
 71    #[inline(always)]
 72    pub fn chars(&self) -> u128 {
 73        self.chars
 74    }
 75}
 76
 77#[derive(Clone, Copy, Debug)]
 78pub struct ChunkSlice<'a> {
 79    chars: u128,
 80    chars_utf16: u128,
 81    newlines: u128,
 82    tabs: u128,
 83    text: &'a str,
 84}
 85
 86impl Into<Chunk> for ChunkSlice<'_> {
 87    fn into(self) -> Chunk {
 88        Chunk {
 89            chars: self.chars,
 90            chars_utf16: self.chars_utf16,
 91            newlines: self.newlines,
 92            tabs: self.tabs,
 93            text: self.text.try_into().unwrap(),
 94        }
 95    }
 96}
 97
 98impl<'a> ChunkSlice<'a> {
 99    #[inline(always)]
100    pub fn is_empty(&self) -> bool {
101        self.text.is_empty()
102    }
103
104    #[inline(always)]
105    pub fn is_char_boundary(self, offset: usize) -> bool {
106        self.text.is_char_boundary(offset)
107    }
108
109    #[inline(always)]
110    pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
111        if mid == MAX_BASE {
112            let left = self;
113            let right = ChunkSlice {
114                chars: 0,
115                chars_utf16: 0,
116                newlines: 0,
117                tabs: 0,
118                text: "",
119            };
120            (left, right)
121        } else {
122            let mask = (1u128 << mid) - 1;
123            let (left_text, right_text) = self.text.split_at(mid);
124            let left = ChunkSlice {
125                chars: self.chars & mask,
126                chars_utf16: self.chars_utf16 & mask,
127                newlines: self.newlines & mask,
128                tabs: self.tabs & mask,
129                text: left_text,
130            };
131            let right = ChunkSlice {
132                chars: self.chars >> mid,
133                chars_utf16: self.chars_utf16 >> mid,
134                newlines: self.newlines >> mid,
135                tabs: self.tabs >> mid,
136                text: right_text,
137            };
138            (left, right)
139        }
140    }
141
142    #[inline(always)]
143    pub fn slice(self, range: Range<usize>) -> Self {
144        let mask = if range.end == MAX_BASE {
145            u128::MAX
146        } else {
147            (1u128 << range.end) - 1
148        };
149        if range.start == MAX_BASE {
150            Self {
151                chars: 0,
152                chars_utf16: 0,
153                newlines: 0,
154                tabs: 0,
155                text: "",
156            }
157        } else {
158            Self {
159                chars: (self.chars & mask) >> range.start,
160                chars_utf16: (self.chars_utf16 & mask) >> range.start,
161                newlines: (self.newlines & mask) >> range.start,
162                tabs: (self.tabs & mask) >> range.start,
163                text: &self.text[range],
164            }
165        }
166    }
167
168    #[inline(always)]
169    pub fn text_summary(&self) -> TextSummary {
170        let mut chars = 0;
171        let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
172        TextSummary {
173            len: self.len(),
174            chars,
175            len_utf16: self.len_utf16(),
176            lines: self.lines(),
177            first_line_chars: self.first_line_chars(),
178            last_line_chars: self.last_line_chars(),
179            last_line_len_utf16: self.last_line_len_utf16(),
180            longest_row,
181            longest_row_chars,
182        }
183    }
184
185    /// Get length in bytes
186    #[inline(always)]
187    pub fn len(&self) -> usize {
188        self.text.len()
189    }
190
191    /// Get length in UTF-16 code units
192    #[inline(always)]
193    pub fn len_utf16(&self) -> OffsetUtf16 {
194        OffsetUtf16(self.chars_utf16.count_ones() as usize)
195    }
196
197    /// Get point representing number of lines and length of last line
198    #[inline(always)]
199    pub fn lines(&self) -> Point {
200        let row = self.newlines.count_ones();
201        let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
202        Point::new(row, column)
203    }
204
205    /// Get number of chars in first line
206    #[inline(always)]
207    pub fn first_line_chars(&self) -> u32 {
208        if self.newlines == 0 {
209            self.chars.count_ones()
210        } else {
211            let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
212            (self.chars & mask).count_ones()
213        }
214    }
215
216    /// Get number of chars in last line
217    #[inline(always)]
218    pub fn last_line_chars(&self) -> u32 {
219        if self.newlines == 0 {
220            self.chars.count_ones()
221        } else {
222            let mask = !(u128::MAX >> self.newlines.leading_zeros());
223            (self.chars & mask).count_ones()
224        }
225    }
226
227    /// Get number of UTF-16 code units in last line
228    #[inline(always)]
229    pub fn last_line_len_utf16(&self) -> u32 {
230        if self.newlines == 0 {
231            self.chars_utf16.count_ones()
232        } else {
233            let mask = !(u128::MAX >> self.newlines.leading_zeros());
234            (self.chars_utf16 & mask).count_ones()
235        }
236    }
237
238    /// Get the longest row in the chunk and its length in characters.
239    /// Calculate the total number of characters in the chunk along the way.
240    #[inline(always)]
241    pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
242        let mut chars = self.chars;
243        let mut newlines = self.newlines;
244        *total_chars = 0;
245        let mut row = 0;
246        let mut longest_row = 0;
247        let mut longest_row_chars = 0;
248        while newlines > 0 {
249            let newline_ix = newlines.trailing_zeros();
250            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
251            *total_chars += usize::from(row_chars);
252            if row_chars > longest_row_chars {
253                longest_row = row;
254                longest_row_chars = row_chars;
255            }
256
257            newlines >>= newline_ix;
258            newlines >>= 1;
259            chars >>= newline_ix;
260            chars >>= 1;
261            row += 1;
262            *total_chars += 1;
263        }
264
265        let row_chars = chars.count_ones() as u8;
266        *total_chars += usize::from(row_chars);
267        if row_chars > longest_row_chars {
268            (row, row_chars as u32)
269        } else {
270            (longest_row, longest_row_chars as u32)
271        }
272    }
273
274    #[inline(always)]
275    pub fn offset_to_point(&self, offset: usize) -> Point {
276        let mask = if offset == MAX_BASE {
277            u128::MAX
278        } else {
279            (1u128 << offset) - 1
280        };
281        let row = (self.newlines & mask).count_ones();
282        let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
283        let column = (offset - newline_ix as usize) as u32;
284        Point::new(row, column)
285    }
286
287    #[inline(always)]
288    pub fn point_to_offset(&self, point: Point) -> usize {
289        if point.row > self.lines().row {
290            debug_panic!(
291                "point {:?} extends beyond rows for string {:?}",
292                point,
293                self.text
294            );
295            return self.len();
296        }
297
298        let row_offset_range = self.offset_range_for_row(point.row);
299        if point.column > row_offset_range.len() as u32 {
300            debug_panic!(
301                "point {:?} extends beyond row for string {:?}",
302                point,
303                self.text
304            );
305            row_offset_range.end
306        } else {
307            row_offset_range.start + point.column as usize
308        }
309    }
310
311    #[inline(always)]
312    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
313        let mask = if offset == MAX_BASE {
314            u128::MAX
315        } else {
316            (1u128 << offset) - 1
317        };
318        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
319    }
320
321    #[inline(always)]
322    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
323        if target.0 == 0 {
324            0
325        } else {
326            let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
327            if ix == MAX_BASE {
328                MAX_BASE
329            } else {
330                let utf8_additional_len = cmp::min(
331                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
332                    self.text.len() - ix,
333                );
334                ix + utf8_additional_len
335            }
336        }
337    }
338
339    #[inline(always)]
340    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
341        let mask = if offset == MAX_BASE {
342            u128::MAX
343        } else {
344            (1u128 << offset) - 1
345        };
346        let row = (self.newlines & mask).count_ones();
347        let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
348        let column = if newline_ix as usize == MAX_BASE {
349            0
350        } else {
351            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
352        };
353        PointUtf16::new(row, column)
354    }
355
356    #[inline(always)]
357    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
358        self.offset_to_point_utf16(self.point_to_offset(point))
359    }
360
361    #[inline(always)]
362    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
363        let lines = self.lines();
364        if point.row > lines.row {
365            if !clip {
366                debug_panic!(
367                    "point {:?} is beyond this chunk's extent {:?}",
368                    point,
369                    self.text
370                );
371            }
372            return self.len();
373        }
374
375        let row_offset_range = self.offset_range_for_row(point.row);
376        let line = self.slice(row_offset_range.clone());
377        if point.column > line.last_line_len_utf16() {
378            if !clip {
379                debug_panic!(
380                    "point {:?} is beyond the end of the line in chunk {:?}",
381                    point,
382                    self.text
383                );
384            }
385            return line.len();
386        }
387
388        let mut offset = row_offset_range.start;
389        if point.column > 0 {
390            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
391            if !self.text.is_char_boundary(offset) {
392                offset -= 1;
393                while !self.text.is_char_boundary(offset) {
394                    offset -= 1;
395                }
396                if !clip {
397                    debug_panic!(
398                        "point {:?} is within character in chunk {:?}",
399                        point,
400                        self.text,
401                    );
402                }
403            }
404        }
405        offset
406    }
407
408    #[inline(always)]
409    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
410        let max_point = self.lines();
411        if point.0.row > max_point.row {
412            return max_point;
413        }
414
415        let row_offset_range = self.offset_range_for_row(point.0.row);
416        let line = self.slice(row_offset_range);
417        if point.0.column == 0 {
418            Point::new(point.0.row, 0)
419        } else if point.0.column >= line.len_utf16().0 as u32 {
420            Point::new(point.0.row, line.len() as u32)
421        } else {
422            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
423            while !line.text.is_char_boundary(column) {
424                column -= 1;
425            }
426            Point::new(point.0.row, column as u32)
427        }
428    }
429
430    #[inline(always)]
431    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
432        let max_point = self.lines();
433        if point.row > max_point.row {
434            return max_point;
435        }
436
437        let line = self.slice(self.offset_range_for_row(point.row));
438        if point.column == 0 {
439            point
440        } else if point.column >= line.len() as u32 {
441            Point::new(point.row, line.len() as u32)
442        } else {
443            let mut column = point.column as usize;
444            let bytes = line.text.as_bytes();
445            if bytes[column - 1] < 128 && bytes[column] < 128 {
446                return Point::new(point.row, column as u32);
447            }
448
449            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
450            loop {
451                if line.is_char_boundary(column)
452                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
453                {
454                    break;
455                }
456
457                match bias {
458                    Bias::Left => column -= 1,
459                    Bias::Right => column += 1,
460                }
461                grapheme_cursor.set_cursor(column);
462            }
463            Point::new(point.row, column as u32)
464        }
465    }
466
467    #[inline(always)]
468    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
469        let max_point = self.lines();
470        if point.0.row > max_point.row {
471            PointUtf16::new(max_point.row, self.last_line_len_utf16())
472        } else {
473            let line = self.slice(self.offset_range_for_row(point.0.row));
474            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
475            PointUtf16::new(point.0.row, column.0 as u32)
476        }
477    }
478
479    #[inline(always)]
480    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
481        if target == OffsetUtf16::default() {
482            OffsetUtf16::default()
483        } else if target >= self.len_utf16() {
484            self.len_utf16()
485        } else {
486            let mut offset = self.offset_utf16_to_offset(target);
487            while !self.text.is_char_boundary(offset) {
488                if bias == Bias::Left {
489                    offset -= 1;
490                } else {
491                    offset += 1;
492                }
493            }
494            self.offset_to_offset_utf16(offset)
495        }
496    }
497
498    #[inline(always)]
499    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
500        let row_start = if row > 0 {
501            nth_set_bit(self.newlines, row as usize) + 1
502        } else {
503            0
504        };
505        let row_len = if row_start == MAX_BASE {
506            0
507        } else {
508            cmp::min(
509                (self.newlines >> row_start).trailing_zeros(),
510                (self.text.len() - row_start) as u32,
511            )
512        };
513        row_start..row_start + row_len as usize
514    }
515
516    #[inline(always)]
517    pub fn tabs(&self) -> Tabs {
518        Tabs {
519            tabs: self.tabs,
520            chars: self.chars,
521        }
522    }
523}
524
525pub struct Tabs {
526    tabs: u128,
527    chars: u128,
528}
529
530#[derive(Debug, PartialEq, Eq)]
531pub struct TabPosition {
532    pub byte_offset: usize,
533    pub char_offset: usize,
534}
535
536impl Iterator for Tabs {
537    type Item = TabPosition;
538
539    fn next(&mut self) -> Option<Self::Item> {
540        if self.tabs == 0 {
541            return None;
542        }
543
544        let tab_offset = self.tabs.trailing_zeros() as usize;
545        let chars_mask = (1 << tab_offset) - 1;
546        let char_offset = (self.chars & chars_mask).count_ones() as usize;
547
548        // Since tabs are 1 byte the tab offset is the same as the byte offset
549        let position = TabPosition {
550            byte_offset: tab_offset,
551            char_offset,
552        };
553        // Remove the tab we've just seen
554        self.tabs ^= 1 << tab_offset;
555
556        Some(position)
557    }
558}
559
560/// Finds the n-th bit that is set to 1.
561#[inline(always)]
562fn nth_set_bit(v: u128, n: usize) -> usize {
563    let low = v as u64;
564    let high = (v >> 64) as u64;
565
566    let low_count = low.count_ones() as usize;
567    if n > low_count {
568        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
569    } else {
570        nth_set_bit_u64(low, n as u64) as usize
571    }
572}
573
574#[inline(always)]
575fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
576    let v = v.reverse_bits();
577    let mut s: u64 = 64;
578
579    // Parallel bit count intermediates
580    let a = v - ((v >> 1) & (u64::MAX / 3));
581    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
582    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
583    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
584
585    // Branchless select
586    let t = (d >> 32) + (d >> 48);
587    s -= (t.wrapping_sub(n) & 256) >> 3;
588    n -= t & (t.wrapping_sub(n) >> 8);
589
590    let t = (d >> (s - 16)) & 0xff;
591    s -= (t.wrapping_sub(n) & 256) >> 4;
592    n -= t & (t.wrapping_sub(n) >> 8);
593
594    let t = (c >> (s - 8)) & 0xf;
595    s -= (t.wrapping_sub(n) & 256) >> 5;
596    n -= t & (t.wrapping_sub(n) >> 8);
597
598    let t = (b >> (s - 4)) & 0x7;
599    s -= (t.wrapping_sub(n) & 256) >> 6;
600    n -= t & (t.wrapping_sub(n) >> 8);
601
602    let t = (a >> (s - 2)) & 0x3;
603    s -= (t.wrapping_sub(n) & 256) >> 7;
604    n -= t & (t.wrapping_sub(n) >> 8);
605
606    let t = (v >> (s - 1)) & 0x1;
607    s -= (t.wrapping_sub(n) & 256) >> 8;
608
609    65 - s - 1
610}
611
612#[cfg(test)]
613mod tests {
614    use super::*;
615    use rand::prelude::*;
616    use util::RandomCharIter;
617
618    #[gpui::test(iterations = 100)]
619    fn test_random_chunks(mut rng: StdRng) {
620        let chunk_len = rng.random_range(0..=MAX_BASE);
621        let text = RandomCharIter::new(&mut rng)
622            .take(chunk_len)
623            .collect::<String>();
624        let mut ix = chunk_len;
625        while !text.is_char_boundary(ix) {
626            ix -= 1;
627        }
628        let text = &text[..ix];
629
630        log::info!("Chunk: {:?}", text);
631        let chunk = Chunk::new(text);
632        verify_chunk(chunk.as_slice(), text);
633
634        for _ in 0..10 {
635            let mut start = rng.random_range(0..=chunk.text.len());
636            let mut end = rng.random_range(start..=chunk.text.len());
637            while !chunk.text.is_char_boundary(start) {
638                start -= 1;
639            }
640            while !chunk.text.is_char_boundary(end) {
641                end -= 1;
642            }
643            let range = start..end;
644            log::info!("Range: {:?}", range);
645            let text_slice = &text[range.clone()];
646            let chunk_slice = chunk.slice(range);
647            verify_chunk(chunk_slice, text_slice);
648        }
649    }
650
651    #[gpui::test(iterations = 1000)]
652    fn test_nth_set_bit_random(mut rng: StdRng) {
653        let set_count = rng.random_range(0..=128);
654        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
655        set_bits.sort();
656        let mut n = 0;
657        for ix in set_bits.iter().copied() {
658            n |= 1 << ix;
659        }
660
661        for (mut ix, position) in set_bits.into_iter().enumerate() {
662            ix += 1;
663            assert_eq!(
664                nth_set_bit(n, ix),
665                position,
666                "nth_set_bit({:0128b}, {})",
667                n,
668                ix
669            );
670        }
671    }
672
673    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
674        let mut offset = 0;
675        let mut offset_utf16 = OffsetUtf16(0);
676        let mut point = Point::zero();
677        let mut point_utf16 = PointUtf16::zero();
678
679        log::info!("Verifying chunk {:?}", text);
680        assert_eq!(chunk.offset_to_point(0), Point::zero());
681
682        let mut expected_tab_positions = Vec::new();
683
684        for (char_offset, c) in text.chars().enumerate() {
685            let expected_point = chunk.offset_to_point(offset);
686            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
687            assert_eq!(
688                chunk.point_to_offset(point),
689                offset,
690                "mismatch at point {:?}",
691                point
692            );
693            assert_eq!(
694                chunk.offset_to_offset_utf16(offset),
695                offset_utf16,
696                "mismatch at offset {}",
697                offset
698            );
699            assert_eq!(
700                chunk.offset_utf16_to_offset(offset_utf16),
701                offset,
702                "mismatch at offset_utf16 {:?}",
703                offset_utf16
704            );
705            assert_eq!(
706                chunk.point_to_point_utf16(point),
707                point_utf16,
708                "mismatch at point {:?}",
709                point
710            );
711            assert_eq!(
712                chunk.point_utf16_to_offset(point_utf16, false),
713                offset,
714                "mismatch at point_utf16 {:?}",
715                point_utf16
716            );
717            assert_eq!(
718                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
719                point,
720                "mismatch for unclipped_point_utf16_to_point at {:?}",
721                point_utf16
722            );
723
724            assert_eq!(
725                chunk.clip_point(point, Bias::Left),
726                point,
727                "incorrect left clip at {:?}",
728                point
729            );
730            assert_eq!(
731                chunk.clip_point(point, Bias::Right),
732                point,
733                "incorrect right clip at {:?}",
734                point
735            );
736
737            for i in 1..c.len_utf8() {
738                let test_point = Point::new(point.row, point.column + i as u32);
739                assert_eq!(
740                    chunk.clip_point(test_point, Bias::Left),
741                    point,
742                    "incorrect left clip within multi-byte char at {:?}",
743                    test_point
744                );
745                assert_eq!(
746                    chunk.clip_point(test_point, Bias::Right),
747                    Point::new(point.row, point.column + c.len_utf8() as u32),
748                    "incorrect right clip within multi-byte char at {:?}",
749                    test_point
750                );
751            }
752
753            for i in 1..c.len_utf16() {
754                let test_point = Unclipped(PointUtf16::new(
755                    point_utf16.row,
756                    point_utf16.column + i as u32,
757                ));
758                assert_eq!(
759                    chunk.unclipped_point_utf16_to_point(test_point),
760                    point,
761                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
762                    test_point
763                );
764                assert_eq!(
765                    chunk.clip_point_utf16(test_point, Bias::Left),
766                    point_utf16,
767                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
768                    test_point
769                );
770                assert_eq!(
771                    chunk.clip_point_utf16(test_point, Bias::Right),
772                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
773                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
774                    test_point
775                );
776
777                let test_offset = OffsetUtf16(offset_utf16.0 + i);
778                assert_eq!(
779                    chunk.clip_offset_utf16(test_offset, Bias::Left),
780                    offset_utf16,
781                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
782                    test_offset
783                );
784                assert_eq!(
785                    chunk.clip_offset_utf16(test_offset, Bias::Right),
786                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
787                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
788                    test_offset
789                );
790            }
791
792            if c == '\n' {
793                point.row += 1;
794                point.column = 0;
795                point_utf16.row += 1;
796                point_utf16.column = 0;
797            } else {
798                point.column += c.len_utf8() as u32;
799                point_utf16.column += c.len_utf16() as u32;
800            }
801
802            if c == '\t' {
803                expected_tab_positions.push(TabPosition {
804                    byte_offset: offset,
805                    char_offset,
806                });
807            }
808
809            offset += c.len_utf8();
810            offset_utf16.0 += c.len_utf16();
811        }
812
813        let final_point = chunk.offset_to_point(offset);
814        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
815        assert_eq!(
816            chunk.point_to_offset(point),
817            offset,
818            "mismatch at point {:?}",
819            point
820        );
821        assert_eq!(
822            chunk.offset_to_offset_utf16(offset),
823            offset_utf16,
824            "mismatch at offset {}",
825            offset
826        );
827        assert_eq!(
828            chunk.offset_utf16_to_offset(offset_utf16),
829            offset,
830            "mismatch at offset_utf16 {:?}",
831            offset_utf16
832        );
833        assert_eq!(
834            chunk.point_to_point_utf16(point),
835            point_utf16,
836            "mismatch at final point {:?}",
837            point
838        );
839        assert_eq!(
840            chunk.point_utf16_to_offset(point_utf16, false),
841            offset,
842            "mismatch at final point_utf16 {:?}",
843            point_utf16
844        );
845        assert_eq!(
846            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
847            point,
848            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
849            point_utf16
850        );
851        assert_eq!(
852            chunk.clip_point(point, Bias::Left),
853            point,
854            "incorrect left clip at final point {:?}",
855            point
856        );
857        assert_eq!(
858            chunk.clip_point(point, Bias::Right),
859            point,
860            "incorrect right clip at final point {:?}",
861            point
862        );
863        assert_eq!(
864            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
865            point_utf16,
866            "incorrect left clip_point_utf16 at final point {:?}",
867            point_utf16
868        );
869        assert_eq!(
870            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
871            point_utf16,
872            "incorrect right clip_point_utf16 at final point {:?}",
873            point_utf16
874        );
875        assert_eq!(
876            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
877            offset_utf16,
878            "incorrect left clip_offset_utf16 at final offset {:?}",
879            offset_utf16
880        );
881        assert_eq!(
882            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
883            offset_utf16,
884            "incorrect right clip_offset_utf16 at final offset {:?}",
885            offset_utf16
886        );
887
888        // Verify length methods
889        assert_eq!(chunk.len(), text.len());
890        assert_eq!(
891            chunk.len_utf16().0,
892            text.chars().map(|c| c.len_utf16()).sum::<usize>()
893        );
894
895        // Verify line counting
896        let lines = chunk.lines();
897        let mut newline_count = 0;
898        let mut last_line_len = 0;
899        for c in text.chars() {
900            if c == '\n' {
901                newline_count += 1;
902                last_line_len = 0;
903            } else {
904                last_line_len += c.len_utf8() as u32;
905            }
906        }
907        assert_eq!(lines, Point::new(newline_count, last_line_len));
908
909        // Verify first/last line chars
910        if !text.is_empty() {
911            let first_line = text.split('\n').next().unwrap();
912            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
913
914            let last_line = text.split('\n').next_back().unwrap();
915            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
916            assert_eq!(
917                chunk.last_line_len_utf16(),
918                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
919            );
920        }
921
922        // Verify longest row
923        let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
924        let mut max_chars = 0;
925        let mut current_row = 0;
926        let mut current_chars = 0;
927        let mut max_row = 0;
928
929        for c in text.chars() {
930            if c == '\n' {
931                if current_chars > max_chars {
932                    max_chars = current_chars;
933                    max_row = current_row;
934                }
935                current_row += 1;
936                current_chars = 0;
937            } else {
938                current_chars += 1;
939            }
940        }
941
942        if current_chars > max_chars {
943            max_chars = current_chars;
944            max_row = current_row;
945        }
946
947        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
948        assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
949    }
950}