chunk.rs

  1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
  2use arrayvec::ArrayString;
  3use std::{cmp, ops::Range};
  4use sum_tree::Bias;
  5use unicode_segmentation::GraphemeCursor;
  6use util::debug_panic;
  7
  8pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
  9pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
 10
 11#[derive(Clone, Debug, Default)]
 12pub struct Chunk {
 13    chars: u128,
 14    chars_utf16: u128,
 15    newlines: u128,
 16    tabs: u128,
 17    pub text: ArrayString<MAX_BASE>,
 18}
 19
 20impl Chunk {
 21    #[inline(always)]
 22    pub fn new(text: &str) -> Self {
 23        let mut this = Chunk::default();
 24        this.push_str(text);
 25        this
 26    }
 27
 28    #[inline(always)]
 29    pub fn push_str(&mut self, text: &str) {
 30        for (char_ix, c) in text.char_indices() {
 31            let ix = self.text.len() + char_ix;
 32            self.chars |= 1 << ix;
 33            self.chars_utf16 |= 1 << ix;
 34            self.chars_utf16 |= (c.len_utf16() as u128) << ix;
 35            self.newlines |= ((c == '\n') as u128) << ix;
 36            self.tabs |= ((c == '\t') as u128) << ix;
 37        }
 38        self.text.push_str(text);
 39    }
 40
 41    #[inline(always)]
 42    pub fn append(&mut self, slice: ChunkSlice) {
 43        if slice.is_empty() {
 44            return;
 45        };
 46
 47        let base_ix = self.text.len();
 48        self.chars |= slice.chars << base_ix;
 49        self.chars_utf16 |= slice.chars_utf16 << base_ix;
 50        self.newlines |= slice.newlines << base_ix;
 51        self.tabs |= slice.tabs << base_ix;
 52        self.text.push_str(&slice.text);
 53    }
 54
 55    #[inline(always)]
 56    pub fn as_slice(&self) -> ChunkSlice {
 57        ChunkSlice {
 58            chars: self.chars,
 59            chars_utf16: self.chars_utf16,
 60            newlines: self.newlines,
 61            tabs: self.tabs,
 62            text: &self.text,
 63        }
 64    }
 65
 66    #[inline(always)]
 67    pub fn slice(&self, range: Range<usize>) -> ChunkSlice {
 68        self.as_slice().slice(range)
 69    }
 70}
 71
 72#[derive(Clone, Copy, Debug)]
 73pub struct ChunkSlice<'a> {
 74    chars: u128,
 75    chars_utf16: u128,
 76    newlines: u128,
 77    tabs: u128,
 78    text: &'a str,
 79}
 80
 81impl<'a> Into<Chunk> for ChunkSlice<'a> {
 82    fn into(self) -> Chunk {
 83        Chunk {
 84            chars: self.chars,
 85            chars_utf16: self.chars_utf16,
 86            newlines: self.newlines,
 87            tabs: self.tabs,
 88            text: self.text.try_into().unwrap(),
 89        }
 90    }
 91}
 92
 93impl<'a> ChunkSlice<'a> {
 94    #[inline(always)]
 95    pub fn is_empty(self) -> bool {
 96        self.text.is_empty()
 97    }
 98
 99    #[inline(always)]
100    pub fn is_char_boundary(self, offset: usize) -> bool {
101        self.text.is_char_boundary(offset)
102    }
103
104    #[inline(always)]
105    pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
106        if mid == MAX_BASE {
107            let left = self;
108            let right = ChunkSlice {
109                chars: 0,
110                chars_utf16: 0,
111                newlines: 0,
112                tabs: 0,
113                text: "",
114            };
115            (left, right)
116        } else {
117            let mask = (1u128 << mid) - 1;
118            let (left_text, right_text) = self.text.split_at(mid);
119            let left = ChunkSlice {
120                chars: self.chars & mask,
121                chars_utf16: self.chars_utf16 & mask,
122                newlines: self.newlines & mask,
123                tabs: self.tabs & mask,
124                text: left_text,
125            };
126            let right = ChunkSlice {
127                chars: self.chars >> mid,
128                chars_utf16: self.chars_utf16 >> mid,
129                newlines: self.newlines >> mid,
130                tabs: self.tabs >> mid,
131                text: right_text,
132            };
133            (left, right)
134        }
135    }
136
137    #[inline(always)]
138    pub fn slice(self, range: Range<usize>) -> Self {
139        let mask = if range.end == MAX_BASE {
140            u128::MAX
141        } else {
142            (1u128 << range.end) - 1
143        };
144        if range.start == MAX_BASE {
145            Self {
146                chars: 0,
147                chars_utf16: 0,
148                newlines: 0,
149                tabs: 0,
150                text: "",
151            }
152        } else {
153            Self {
154                chars: (self.chars & mask) >> range.start,
155                chars_utf16: (self.chars_utf16 & mask) >> range.start,
156                newlines: (self.newlines & mask) >> range.start,
157                tabs: (self.tabs & mask) >> range.start,
158                text: &self.text[range],
159            }
160        }
161    }
162
163    #[inline(always)]
164    pub fn text_summary(&self) -> TextSummary {
165        let (longest_row, longest_row_chars) = self.longest_row();
166        TextSummary {
167            len: self.len(),
168            len_utf16: self.len_utf16(),
169            lines: self.lines(),
170            first_line_chars: self.first_line_chars(),
171            last_line_chars: self.last_line_chars(),
172            last_line_len_utf16: self.last_line_len_utf16(),
173            longest_row,
174            longest_row_chars,
175        }
176    }
177
178    /// Get length in bytes
179    #[inline(always)]
180    pub fn len(&self) -> usize {
181        self.text.len()
182    }
183
184    /// Get length in UTF-16 code units
185    #[inline(always)]
186    pub fn len_utf16(&self) -> OffsetUtf16 {
187        OffsetUtf16(self.chars_utf16.count_ones() as usize)
188    }
189
190    /// Get point representing number of lines and length of last line
191    #[inline(always)]
192    pub fn lines(&self) -> Point {
193        let row = self.newlines.count_ones();
194        let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
195        Point::new(row, column)
196    }
197
198    /// Get number of chars in first line
199    #[inline(always)]
200    pub fn first_line_chars(&self) -> u32 {
201        if self.newlines == 0 {
202            self.chars.count_ones()
203        } else {
204            let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
205            (self.chars & mask).count_ones()
206        }
207    }
208
209    /// Get number of chars in last line
210    #[inline(always)]
211    pub fn last_line_chars(&self) -> u32 {
212        if self.newlines == 0 {
213            self.chars.count_ones()
214        } else {
215            let mask = !(u128::MAX >> self.newlines.leading_zeros());
216            (self.chars & mask).count_ones()
217        }
218    }
219
220    /// Get number of UTF-16 code units in last line
221    #[inline(always)]
222    pub fn last_line_len_utf16(&self) -> u32 {
223        if self.newlines == 0 {
224            self.chars_utf16.count_ones()
225        } else {
226            let mask = !(u128::MAX >> self.newlines.leading_zeros());
227            (self.chars_utf16 & mask).count_ones()
228        }
229    }
230
231    /// Get the longest row in the chunk and its length in characters.
232    #[inline(always)]
233    pub fn longest_row(&self) -> (u32, u32) {
234        let mut chars = self.chars;
235        let mut newlines = self.newlines;
236        let mut row = 0;
237        let mut longest_row = 0;
238        let mut longest_row_chars = 0;
239        while newlines > 0 {
240            let newline_ix = newlines.trailing_zeros();
241            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
242            if row_chars > longest_row_chars {
243                longest_row = row;
244                longest_row_chars = row_chars;
245            }
246
247            newlines >>= newline_ix;
248            newlines >>= 1;
249            chars >>= newline_ix;
250            chars >>= 1;
251            row += 1;
252        }
253
254        let row_chars = chars.count_ones() as u8;
255        if row_chars > longest_row_chars {
256            (row, row_chars as u32)
257        } else {
258            (longest_row, longest_row_chars as u32)
259        }
260    }
261
262    #[inline(always)]
263    pub fn offset_to_point(&self, offset: usize) -> Point {
264        let mask = if offset == MAX_BASE {
265            u128::MAX
266        } else {
267            (1u128 << offset) - 1
268        };
269        let row = (self.newlines & mask).count_ones();
270        let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
271        let column = (offset - newline_ix as usize) as u32;
272        Point::new(row, column)
273    }
274
275    #[inline(always)]
276    pub fn point_to_offset(&self, point: Point) -> usize {
277        if point.row > self.lines().row {
278            debug_panic!(
279                "point {:?} extends beyond rows for string {:?}",
280                point,
281                self.text
282            );
283            return self.len();
284        }
285
286        let row_offset_range = self.offset_range_for_row(point.row);
287        if point.column > row_offset_range.len() as u32 {
288            debug_panic!(
289                "point {:?} extends beyond row for string {:?}",
290                point,
291                self.text
292            );
293            row_offset_range.end
294        } else {
295            row_offset_range.start + point.column as usize
296        }
297    }
298
299    #[inline(always)]
300    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
301        let mask = if offset == MAX_BASE {
302            u128::MAX
303        } else {
304            (1u128 << offset) - 1
305        };
306        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
307    }
308
309    #[inline(always)]
310    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
311        if target.0 == 0 {
312            0
313        } else {
314            let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
315            if ix == MAX_BASE {
316                MAX_BASE
317            } else {
318                let utf8_additional_len = cmp::min(
319                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
320                    self.text.len() - ix,
321                );
322                ix + utf8_additional_len
323            }
324        }
325    }
326
327    #[inline(always)]
328    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
329        let mask = if offset == MAX_BASE {
330            u128::MAX
331        } else {
332            (1u128 << offset) - 1
333        };
334        let row = (self.newlines & mask).count_ones();
335        let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
336        let column = if newline_ix as usize == MAX_BASE {
337            0
338        } else {
339            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
340        };
341        PointUtf16::new(row, column)
342    }
343
344    #[inline(always)]
345    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
346        self.offset_to_point_utf16(self.point_to_offset(point))
347    }
348
349    #[inline(always)]
350    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
351        let lines = self.lines();
352        if point.row > lines.row {
353            if !clip {
354                debug_panic!(
355                    "point {:?} is beyond this chunk's extent {:?}",
356                    point,
357                    self.text
358                );
359            }
360            return self.len();
361        }
362
363        let row_offset_range = self.offset_range_for_row(point.row);
364        let line = self.slice(row_offset_range.clone());
365        if point.column > line.last_line_len_utf16() {
366            if !clip {
367                debug_panic!(
368                    "point {:?} is beyond the end of the line in chunk {:?}",
369                    point,
370                    self.text
371                );
372            }
373            return line.len();
374        }
375
376        let mut offset = row_offset_range.start;
377        if point.column > 0 {
378            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
379            if !self.text.is_char_boundary(offset) {
380                offset -= 1;
381                while !self.text.is_char_boundary(offset) {
382                    offset -= 1;
383                }
384                if !clip {
385                    debug_panic!(
386                        "point {:?} is within character in chunk {:?}",
387                        point,
388                        self.text,
389                    );
390                }
391            }
392        }
393        offset
394    }
395
396    #[inline(always)]
397    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
398        let max_point = self.lines();
399        if point.0.row > max_point.row {
400            return max_point;
401        }
402
403        let row_offset_range = self.offset_range_for_row(point.0.row);
404        let line = self.slice(row_offset_range.clone());
405        if point.0.column == 0 {
406            Point::new(point.0.row, 0)
407        } else if point.0.column >= line.len_utf16().0 as u32 {
408            Point::new(point.0.row, line.len() as u32)
409        } else {
410            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
411            while !line.text.is_char_boundary(column) {
412                column -= 1;
413            }
414            Point::new(point.0.row, column as u32)
415        }
416    }
417
418    #[inline(always)]
419    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
420        let max_point = self.lines();
421        if point.row > max_point.row {
422            return max_point;
423        }
424
425        let line = self.slice(self.offset_range_for_row(point.row));
426        if point.column == 0 {
427            point
428        } else if point.column >= line.len() as u32 {
429            Point::new(point.row, line.len() as u32)
430        } else {
431            let mut column = point.column as usize;
432            let bytes = line.text.as_bytes();
433            if bytes[column - 1] < 128 && bytes[column] < 128 {
434                return Point::new(point.row, column as u32);
435            }
436
437            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
438            loop {
439                if line.is_char_boundary(column)
440                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
441                {
442                    break;
443                }
444
445                match bias {
446                    Bias::Left => column -= 1,
447                    Bias::Right => column += 1,
448                }
449                grapheme_cursor.set_cursor(column);
450            }
451            Point::new(point.row, column as u32)
452        }
453    }
454
455    #[inline(always)]
456    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
457        let max_point = self.lines();
458        if point.0.row > max_point.row {
459            PointUtf16::new(max_point.row, self.last_line_len_utf16())
460        } else {
461            let line = self.slice(self.offset_range_for_row(point.0.row));
462            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
463            PointUtf16::new(point.0.row, column.0 as u32)
464        }
465    }
466
467    #[inline(always)]
468    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
469        if target == OffsetUtf16::default() {
470            OffsetUtf16::default()
471        } else if target >= self.len_utf16() {
472            self.len_utf16()
473        } else {
474            let mut offset = self.offset_utf16_to_offset(target);
475            while !self.text.is_char_boundary(offset) {
476                if bias == Bias::Left {
477                    offset -= 1;
478                } else {
479                    offset += 1;
480                }
481            }
482            self.offset_to_offset_utf16(offset)
483        }
484    }
485
486    #[inline(always)]
487    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
488        let row_start = if row > 0 {
489            nth_set_bit(self.newlines, row as usize) + 1
490        } else {
491            0
492        };
493        let row_len = if row_start == MAX_BASE {
494            0
495        } else {
496            cmp::min(
497                (self.newlines >> row_start).trailing_zeros(),
498                (self.text.len() - row_start) as u32,
499            )
500        };
501        row_start..row_start + row_len as usize
502    }
503
504    #[inline(always)]
505    pub fn tabs(&self) -> Tabs {
506        Tabs {
507            tabs: self.tabs,
508            chars: self.chars,
509        }
510    }
511}
512
513pub struct Tabs {
514    tabs: u128,
515    chars: u128,
516}
517
518#[derive(Debug, PartialEq, Eq)]
519pub struct TabPosition {
520    pub byte_offset: usize,
521    pub char_offset: usize,
522}
523
524impl Iterator for Tabs {
525    type Item = TabPosition;
526
527    fn next(&mut self) -> Option<Self::Item> {
528        if self.tabs == 0 {
529            return None;
530        }
531
532        let tab_offset = self.tabs.trailing_zeros() as usize;
533        let chars_mask = (1 << tab_offset) - 1;
534        let char_offset = (self.chars & chars_mask).count_ones() as usize;
535
536        // Since tabs are 1 byte the tab offset is the same as the byte offset
537        let position = TabPosition {
538            byte_offset: tab_offset,
539            char_offset: char_offset,
540        };
541        // Remove the tab we've just seen
542        self.tabs ^= 1 << tab_offset;
543
544        Some(position)
545    }
546}
547
548/// Finds the n-th bit that is set to 1.
549#[inline(always)]
550fn nth_set_bit(v: u128, n: usize) -> usize {
551    let low = v as u64;
552    let high = (v >> 64) as u64;
553
554    let low_count = low.count_ones() as usize;
555    if n > low_count {
556        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
557    } else {
558        nth_set_bit_u64(low, n as u64) as usize
559    }
560}
561
562#[inline(always)]
563fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
564    let v = v.reverse_bits();
565    let mut s: u64 = 64;
566
567    // Parallel bit count intermediates
568    let a = v - ((v >> 1) & (u64::MAX / 3));
569    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
570    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
571    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
572
573    // Branchless select
574    let t = (d >> 32) + (d >> 48);
575    s -= (t.wrapping_sub(n) & 256) >> 3;
576    n -= t & (t.wrapping_sub(n) >> 8);
577
578    let t = (d >> (s - 16)) & 0xff;
579    s -= (t.wrapping_sub(n) & 256) >> 4;
580    n -= t & (t.wrapping_sub(n) >> 8);
581
582    let t = (c >> (s - 8)) & 0xf;
583    s -= (t.wrapping_sub(n) & 256) >> 5;
584    n -= t & (t.wrapping_sub(n) >> 8);
585
586    let t = (b >> (s - 4)) & 0x7;
587    s -= (t.wrapping_sub(n) & 256) >> 6;
588    n -= t & (t.wrapping_sub(n) >> 8);
589
590    let t = (a >> (s - 2)) & 0x3;
591    s -= (t.wrapping_sub(n) & 256) >> 7;
592    n -= t & (t.wrapping_sub(n) >> 8);
593
594    let t = (v >> (s - 1)) & 0x1;
595    s -= (t.wrapping_sub(n) & 256) >> 8;
596
597    65 - s - 1
598}
599
600#[cfg(test)]
601mod tests {
602    use super::*;
603    use rand::prelude::*;
604    use util::RandomCharIter;
605
606    #[gpui::test(iterations = 100)]
607    fn test_random_chunks(mut rng: StdRng) {
608        let chunk_len = rng.gen_range(0..=MAX_BASE);
609        let text = RandomCharIter::new(&mut rng)
610            .take(chunk_len)
611            .collect::<String>();
612        let mut ix = chunk_len;
613        while !text.is_char_boundary(ix) {
614            ix -= 1;
615        }
616        let text = &text[..ix];
617
618        log::info!("Chunk: {:?}", text);
619        let chunk = Chunk::new(&text);
620        verify_chunk(chunk.as_slice(), text);
621
622        for _ in 0..10 {
623            let mut start = rng.gen_range(0..=chunk.text.len());
624            let mut end = rng.gen_range(start..=chunk.text.len());
625            while !chunk.text.is_char_boundary(start) {
626                start -= 1;
627            }
628            while !chunk.text.is_char_boundary(end) {
629                end -= 1;
630            }
631            let range = start..end;
632            log::info!("Range: {:?}", range);
633            let text_slice = &text[range.clone()];
634            let chunk_slice = chunk.slice(range);
635            verify_chunk(chunk_slice, text_slice);
636        }
637    }
638
639    #[gpui::test(iterations = 1000)]
640    fn test_nth_set_bit_random(mut rng: StdRng) {
641        let set_count = rng.gen_range(0..=128);
642        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
643        set_bits.sort();
644        let mut n = 0;
645        for ix in set_bits.iter().copied() {
646            n |= 1 << ix;
647        }
648
649        for (mut ix, position) in set_bits.into_iter().enumerate() {
650            ix += 1;
651            assert_eq!(
652                nth_set_bit(n, ix),
653                position,
654                "nth_set_bit({:0128b}, {})",
655                n,
656                ix
657            );
658        }
659    }
660
661    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
662        let mut offset = 0;
663        let mut offset_utf16 = OffsetUtf16(0);
664        let mut point = Point::zero();
665        let mut point_utf16 = PointUtf16::zero();
666
667        log::info!("Verifying chunk {:?}", text);
668        assert_eq!(chunk.offset_to_point(0), Point::zero());
669
670        let mut expected_tab_positions = Vec::new();
671
672        for (char_offset, c) in text.chars().enumerate() {
673            let expected_point = chunk.offset_to_point(offset);
674            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
675            assert_eq!(
676                chunk.point_to_offset(point),
677                offset,
678                "mismatch at point {:?}",
679                point
680            );
681            assert_eq!(
682                chunk.offset_to_offset_utf16(offset),
683                offset_utf16,
684                "mismatch at offset {}",
685                offset
686            );
687            assert_eq!(
688                chunk.offset_utf16_to_offset(offset_utf16),
689                offset,
690                "mismatch at offset_utf16 {:?}",
691                offset_utf16
692            );
693            assert_eq!(
694                chunk.point_to_point_utf16(point),
695                point_utf16,
696                "mismatch at point {:?}",
697                point
698            );
699            assert_eq!(
700                chunk.point_utf16_to_offset(point_utf16, false),
701                offset,
702                "mismatch at point_utf16 {:?}",
703                point_utf16
704            );
705            assert_eq!(
706                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
707                point,
708                "mismatch for unclipped_point_utf16_to_point at {:?}",
709                point_utf16
710            );
711
712            assert_eq!(
713                chunk.clip_point(point, Bias::Left),
714                point,
715                "incorrect left clip at {:?}",
716                point
717            );
718            assert_eq!(
719                chunk.clip_point(point, Bias::Right),
720                point,
721                "incorrect right clip at {:?}",
722                point
723            );
724
725            for i in 1..c.len_utf8() {
726                let test_point = Point::new(point.row, point.column + i as u32);
727                assert_eq!(
728                    chunk.clip_point(test_point, Bias::Left),
729                    point,
730                    "incorrect left clip within multi-byte char at {:?}",
731                    test_point
732                );
733                assert_eq!(
734                    chunk.clip_point(test_point, Bias::Right),
735                    Point::new(point.row, point.column + c.len_utf8() as u32),
736                    "incorrect right clip within multi-byte char at {:?}",
737                    test_point
738                );
739            }
740
741            for i in 1..c.len_utf16() {
742                let test_point = Unclipped(PointUtf16::new(
743                    point_utf16.row,
744                    point_utf16.column + i as u32,
745                ));
746                assert_eq!(
747                    chunk.unclipped_point_utf16_to_point(test_point),
748                    point,
749                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
750                    test_point
751                );
752                assert_eq!(
753                    chunk.clip_point_utf16(test_point, Bias::Left),
754                    point_utf16,
755                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
756                    test_point
757                );
758                assert_eq!(
759                    chunk.clip_point_utf16(test_point, Bias::Right),
760                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
761                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
762                    test_point
763                );
764
765                let test_offset = OffsetUtf16(offset_utf16.0 + i);
766                assert_eq!(
767                    chunk.clip_offset_utf16(test_offset, Bias::Left),
768                    offset_utf16,
769                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
770                    test_offset
771                );
772                assert_eq!(
773                    chunk.clip_offset_utf16(test_offset, Bias::Right),
774                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
775                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
776                    test_offset
777                );
778            }
779
780            if c == '\n' {
781                point.row += 1;
782                point.column = 0;
783                point_utf16.row += 1;
784                point_utf16.column = 0;
785            } else {
786                point.column += c.len_utf8() as u32;
787                point_utf16.column += c.len_utf16() as u32;
788            }
789
790            if c == '\t' {
791                expected_tab_positions.push(TabPosition {
792                    byte_offset: offset,
793                    char_offset,
794                });
795            }
796
797            offset += c.len_utf8();
798            offset_utf16.0 += c.len_utf16();
799        }
800
801        let final_point = chunk.offset_to_point(offset);
802        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
803        assert_eq!(
804            chunk.point_to_offset(point),
805            offset,
806            "mismatch at point {:?}",
807            point
808        );
809        assert_eq!(
810            chunk.offset_to_offset_utf16(offset),
811            offset_utf16,
812            "mismatch at offset {}",
813            offset
814        );
815        assert_eq!(
816            chunk.offset_utf16_to_offset(offset_utf16),
817            offset,
818            "mismatch at offset_utf16 {:?}",
819            offset_utf16
820        );
821        assert_eq!(
822            chunk.point_to_point_utf16(point),
823            point_utf16,
824            "mismatch at final point {:?}",
825            point
826        );
827        assert_eq!(
828            chunk.point_utf16_to_offset(point_utf16, false),
829            offset,
830            "mismatch at final point_utf16 {:?}",
831            point_utf16
832        );
833        assert_eq!(
834            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
835            point,
836            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
837            point_utf16
838        );
839        assert_eq!(
840            chunk.clip_point(point, Bias::Left),
841            point,
842            "incorrect left clip at final point {:?}",
843            point
844        );
845        assert_eq!(
846            chunk.clip_point(point, Bias::Right),
847            point,
848            "incorrect right clip at final point {:?}",
849            point
850        );
851        assert_eq!(
852            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
853            point_utf16,
854            "incorrect left clip_point_utf16 at final point {:?}",
855            point_utf16
856        );
857        assert_eq!(
858            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
859            point_utf16,
860            "incorrect right clip_point_utf16 at final point {:?}",
861            point_utf16
862        );
863        assert_eq!(
864            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
865            offset_utf16,
866            "incorrect left clip_offset_utf16 at final offset {:?}",
867            offset_utf16
868        );
869        assert_eq!(
870            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
871            offset_utf16,
872            "incorrect right clip_offset_utf16 at final offset {:?}",
873            offset_utf16
874        );
875
876        // Verify length methods
877        assert_eq!(chunk.len(), text.len());
878        assert_eq!(
879            chunk.len_utf16().0,
880            text.chars().map(|c| c.len_utf16()).sum::<usize>()
881        );
882
883        // Verify line counting
884        let lines = chunk.lines();
885        let mut newline_count = 0;
886        let mut last_line_len = 0;
887        for c in text.chars() {
888            if c == '\n' {
889                newline_count += 1;
890                last_line_len = 0;
891            } else {
892                last_line_len += c.len_utf8() as u32;
893            }
894        }
895        assert_eq!(lines, Point::new(newline_count, last_line_len));
896
897        // Verify first/last line chars
898        if !text.is_empty() {
899            let first_line = text.split('\n').next().unwrap();
900            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
901
902            let last_line = text.split('\n').last().unwrap();
903            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
904            assert_eq!(
905                chunk.last_line_len_utf16(),
906                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
907            );
908        }
909
910        // Verify longest row
911        let (longest_row, longest_chars) = chunk.longest_row();
912        let mut max_chars = 0;
913        let mut current_row = 0;
914        let mut current_chars = 0;
915        let mut max_row = 0;
916
917        for c in text.chars() {
918            if c == '\n' {
919                if current_chars > max_chars {
920                    max_chars = current_chars;
921                    max_row = current_row;
922                }
923                current_row += 1;
924                current_chars = 0;
925            } else {
926                current_chars += 1;
927            }
928        }
929
930        if current_chars > max_chars {
931            max_chars = current_chars;
932            max_row = current_row;
933        }
934
935        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
936        assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
937    }
938}