chunk.rs

  1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
  2use arrayvec::ArrayString;
  3use std::{cmp, ops::Range};
  4use sum_tree::Bias;
  5use unicode_segmentation::GraphemeCursor;
  6use util::debug_panic;
  7
  8pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
  9pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
 10
 11#[derive(Clone, Debug, Default)]
 12pub struct Chunk {
 13    chars: u128,
 14    chars_utf16: u128,
 15    newlines: u128,
 16    pub text: ArrayString<MAX_BASE>,
 17}
 18
 19impl Chunk {
 20    #[inline(always)]
 21    pub fn new(text: &str) -> Self {
 22        let mut this = Chunk::default();
 23        this.push_str(text);
 24        this
 25    }
 26
 27    #[inline(always)]
 28    pub fn push_str(&mut self, text: &str) {
 29        for (char_ix, c) in text.char_indices() {
 30            let ix = self.text.len() + char_ix;
 31            self.chars |= 1 << ix;
 32            self.chars_utf16 |= 1 << ix;
 33            self.chars_utf16 |= (c.len_utf16() as u128) << ix;
 34            self.newlines |= ((c == '\n') as u128) << ix;
 35        }
 36        self.text.push_str(text);
 37    }
 38
 39    #[inline(always)]
 40    pub fn append(&mut self, slice: ChunkSlice) {
 41        if slice.is_empty() {
 42            return;
 43        };
 44
 45        let base_ix = self.text.len();
 46        self.chars |= slice.chars << base_ix;
 47        self.chars_utf16 |= slice.chars_utf16 << base_ix;
 48        self.newlines |= slice.newlines << base_ix;
 49        self.text.push_str(&slice.text);
 50    }
 51
 52    #[inline(always)]
 53    pub fn as_slice(&self) -> ChunkSlice {
 54        ChunkSlice {
 55            chars: self.chars,
 56            chars_utf16: self.chars_utf16,
 57            newlines: self.newlines,
 58            text: &self.text,
 59        }
 60    }
 61
 62    #[inline(always)]
 63    pub fn slice(&self, range: Range<usize>) -> ChunkSlice {
 64        self.as_slice().slice(range)
 65    }
 66}
 67
 68#[derive(Clone, Copy, Debug)]
 69pub struct ChunkSlice<'a> {
 70    chars: u128,
 71    chars_utf16: u128,
 72    newlines: u128,
 73    text: &'a str,
 74}
 75
 76impl<'a> Into<Chunk> for ChunkSlice<'a> {
 77    fn into(self) -> Chunk {
 78        Chunk {
 79            chars: self.chars,
 80            chars_utf16: self.chars_utf16,
 81            newlines: self.newlines,
 82            text: self.text.try_into().unwrap(),
 83        }
 84    }
 85}
 86
 87impl<'a> ChunkSlice<'a> {
 88    #[inline(always)]
 89    pub fn is_empty(self) -> bool {
 90        self.text.is_empty()
 91    }
 92
 93    #[inline(always)]
 94    pub fn is_char_boundary(self, offset: usize) -> bool {
 95        self.text.is_char_boundary(offset)
 96    }
 97
 98    #[inline(always)]
 99    pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
100        if mid == MAX_BASE {
101            let left = self;
102            let right = ChunkSlice {
103                chars: 0,
104                chars_utf16: 0,
105                newlines: 0,
106                text: "",
107            };
108            (left, right)
109        } else {
110            let mask = if mid == MAX_BASE {
111                u128::MAX
112            } else {
113                (1u128 << mid) - 1
114            };
115            let (left_text, right_text) = self.text.split_at(mid);
116            let left = ChunkSlice {
117                chars: self.chars & mask,
118                chars_utf16: self.chars_utf16 & mask,
119                newlines: self.newlines & mask,
120                text: left_text,
121            };
122            let right = ChunkSlice {
123                chars: self.chars >> mid,
124                chars_utf16: self.chars_utf16 >> mid,
125                newlines: self.newlines >> mid,
126                text: right_text,
127            };
128            (left, right)
129        }
130    }
131
132    #[inline(always)]
133    pub fn slice(self, range: Range<usize>) -> Self {
134        let mask = if range.end == MAX_BASE {
135            u128::MAX
136        } else {
137            (1u128 << range.end) - 1
138        };
139        if range.start == MAX_BASE {
140            Self {
141                chars: 0,
142                chars_utf16: 0,
143                newlines: 0,
144                text: "",
145            }
146        } else {
147            Self {
148                chars: (self.chars & mask) >> range.start,
149                chars_utf16: (self.chars_utf16 & mask) >> range.start,
150                newlines: (self.newlines & mask) >> range.start,
151                text: &self.text[range],
152            }
153        }
154    }
155
156    #[inline(always)]
157    pub fn text_summary(&self) -> TextSummary {
158        let (longest_row, longest_row_chars) = self.longest_row();
159        TextSummary {
160            len: self.len(),
161            len_utf16: self.len_utf16(),
162            lines: self.lines(),
163            first_line_chars: self.first_line_chars(),
164            last_line_chars: self.last_line_chars(),
165            last_line_len_utf16: self.last_line_len_utf16(),
166            longest_row,
167            longest_row_chars,
168        }
169    }
170
171    /// Get length in bytes
172    #[inline(always)]
173    pub fn len(&self) -> usize {
174        self.text.len()
175    }
176
177    /// Get length in UTF-16 code units
178    #[inline(always)]
179    pub fn len_utf16(&self) -> OffsetUtf16 {
180        OffsetUtf16(self.chars_utf16.count_ones() as usize)
181    }
182
183    /// Get point representing number of lines and length of last line
184    #[inline(always)]
185    pub fn lines(&self) -> Point {
186        let row = self.newlines.count_ones();
187        let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
188        Point::new(row, column)
189    }
190
191    /// Get number of chars in first line
192    #[inline(always)]
193    pub fn first_line_chars(&self) -> u32 {
194        if self.newlines == 0 {
195            self.chars.count_ones()
196        } else {
197            let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
198            (self.chars & mask).count_ones()
199        }
200    }
201
202    /// Get number of chars in last line
203    #[inline(always)]
204    pub fn last_line_chars(&self) -> u32 {
205        if self.newlines == 0 {
206            self.chars.count_ones()
207        } else {
208            let mask = !(u128::MAX >> self.newlines.leading_zeros());
209            (self.chars & mask).count_ones()
210        }
211    }
212
213    /// Get number of UTF-16 code units in last line
214    #[inline(always)]
215    pub fn last_line_len_utf16(&self) -> u32 {
216        if self.newlines == 0 {
217            self.chars_utf16.count_ones()
218        } else {
219            let mask = !(u128::MAX >> self.newlines.leading_zeros());
220            (self.chars_utf16 & mask).count_ones()
221        }
222    }
223
224    /// Get the longest row in the chunk and its length in characters.
225    #[inline(always)]
226    pub fn longest_row(&self) -> (u32, u32) {
227        let mut chars = self.chars;
228        let mut newlines = self.newlines;
229        let mut row = 0;
230        let mut longest_row = 0;
231        let mut longest_row_chars = 0;
232        while newlines > 0 {
233            let newline_ix = newlines.trailing_zeros();
234            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
235            if row_chars > longest_row_chars {
236                longest_row = row;
237                longest_row_chars = row_chars;
238            }
239
240            newlines >>= newline_ix;
241            newlines >>= 1;
242            chars >>= newline_ix;
243            chars >>= 1;
244            row += 1;
245        }
246
247        let row_chars = chars.count_ones() as u8;
248        if row_chars > longest_row_chars {
249            (row, row_chars as u32)
250        } else {
251            (longest_row, longest_row_chars as u32)
252        }
253    }
254
255    #[inline(always)]
256    pub fn offset_to_point(&self, offset: usize) -> Point {
257        let mask = if offset == MAX_BASE {
258            u128::MAX
259        } else {
260            (1u128 << offset) - 1
261        };
262        let row = (self.newlines & mask).count_ones();
263        let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
264        let column = (offset - newline_ix as usize) as u32;
265        Point::new(row, column)
266    }
267
268    #[inline(always)]
269    pub fn point_to_offset(&self, point: Point) -> usize {
270        if point.row > self.lines().row {
271            debug_panic!(
272                "point {:?} extends beyond rows for string {:?}",
273                point,
274                self.text
275            );
276            return self.len();
277        }
278
279        let row_offset_range = self.offset_range_for_row(point.row);
280        if point.column > row_offset_range.len() as u32 {
281            debug_panic!(
282                "point {:?} extends beyond row for string {:?}",
283                point,
284                self.text
285            );
286            row_offset_range.end
287        } else {
288            row_offset_range.start + point.column as usize
289        }
290    }
291
292    #[inline(always)]
293    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
294        let mask = if offset == MAX_BASE {
295            u128::MAX
296        } else {
297            (1u128 << offset) - 1
298        };
299        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
300    }
301
302    #[inline(always)]
303    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
304        if target.0 == 0 {
305            0
306        } else {
307            let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
308            if ix == MAX_BASE {
309                MAX_BASE
310            } else {
311                let utf8_additional_len = cmp::min(
312                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
313                    self.text.len() - ix,
314                );
315                ix + utf8_additional_len
316            }
317        }
318    }
319
320    #[inline(always)]
321    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
322        let mask = if offset == MAX_BASE {
323            u128::MAX
324        } else {
325            (1u128 << offset) - 1
326        };
327        let row = (self.newlines & mask).count_ones();
328        let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
329        let column = if newline_ix as usize == MAX_BASE {
330            0
331        } else {
332            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
333        };
334        PointUtf16::new(row, column)
335    }
336
337    #[inline(always)]
338    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
339        self.offset_to_point_utf16(self.point_to_offset(point))
340    }
341
342    #[inline(always)]
343    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
344        let lines = self.lines();
345        if point.row > lines.row {
346            if !clip {
347                debug_panic!(
348                    "point {:?} is beyond this chunk's extent {:?}",
349                    point,
350                    self.text
351                );
352            }
353            return self.len();
354        }
355
356        let row_offset_range = self.offset_range_for_row(point.row);
357        let line = self.slice(row_offset_range.clone());
358        if point.column > line.last_line_len_utf16() {
359            if !clip {
360                debug_panic!(
361                    "point {:?} is beyond the end of the line in chunk {:?}",
362                    point,
363                    self.text
364                );
365            }
366            return line.len();
367        }
368
369        let mut offset = row_offset_range.start;
370        if point.column > 0 {
371            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
372            if !self.text.is_char_boundary(offset) {
373                offset -= 1;
374                while !self.text.is_char_boundary(offset) {
375                    offset -= 1;
376                }
377                if !clip {
378                    debug_panic!(
379                        "point {:?} is within character in chunk {:?}",
380                        point,
381                        self.text,
382                    );
383                }
384            }
385        }
386        offset
387    }
388
389    #[inline(always)]
390    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
391        let max_point = self.lines();
392        if point.0.row > max_point.row {
393            return max_point;
394        }
395
396        let row_offset_range = self.offset_range_for_row(point.0.row);
397        let line = self.slice(row_offset_range.clone());
398        if point.0.column == 0 {
399            Point::new(point.0.row, 0)
400        } else if point.0.column >= line.len_utf16().0 as u32 {
401            Point::new(point.0.row, line.len() as u32)
402        } else {
403            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
404            while !line.text.is_char_boundary(column) {
405                column -= 1;
406            }
407            Point::new(point.0.row, column as u32)
408        }
409    }
410
411    #[inline(always)]
412    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
413        let max_point = self.lines();
414        if point.row > max_point.row {
415            return max_point;
416        }
417
418        let line = self.slice(self.offset_range_for_row(point.row));
419        if point.column == 0 {
420            point
421        } else if point.column >= line.len() as u32 {
422            Point::new(point.row, line.len() as u32)
423        } else {
424            let mut column = point.column as usize;
425            let bytes = line.text.as_bytes();
426            if bytes[column - 1] < 128 && bytes[column] < 128 {
427                return Point::new(point.row, column as u32);
428            }
429
430            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
431            loop {
432                if line.is_char_boundary(column)
433                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
434                {
435                    break;
436                }
437
438                match bias {
439                    Bias::Left => column -= 1,
440                    Bias::Right => column += 1,
441                }
442                grapheme_cursor.set_cursor(column);
443            }
444            Point::new(point.row, column as u32)
445        }
446    }
447
448    #[inline(always)]
449    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
450        let max_point = self.lines();
451        if point.0.row > max_point.row {
452            PointUtf16::new(max_point.row, self.last_line_len_utf16())
453        } else {
454            let line = self.slice(self.offset_range_for_row(point.0.row));
455            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
456            PointUtf16::new(point.0.row, column.0 as u32)
457        }
458    }
459
460    #[inline(always)]
461    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
462        if target == OffsetUtf16::default() {
463            OffsetUtf16::default()
464        } else if target >= self.len_utf16() {
465            self.len_utf16()
466        } else {
467            let mut offset = self.offset_utf16_to_offset(target);
468            while !self.text.is_char_boundary(offset) {
469                if bias == Bias::Left {
470                    offset -= 1;
471                } else {
472                    offset += 1;
473                }
474            }
475            self.offset_to_offset_utf16(offset)
476        }
477    }
478
479    #[inline(always)]
480    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
481        let row_start = if row > 0 {
482            nth_set_bit(self.newlines, row as usize) + 1
483        } else {
484            0
485        };
486        let row_len = if row_start == MAX_BASE {
487            0
488        } else {
489            cmp::min(
490                (self.newlines >> row_start).trailing_zeros(),
491                (self.text.len() - row_start) as u32,
492            )
493        };
494        row_start..row_start + row_len as usize
495    }
496}
497
498/// Finds the n-th bit that is set to 1.
499#[inline(always)]
500fn nth_set_bit(v: u128, n: usize) -> usize {
501    let low = v as u64;
502    let high = (v >> 64) as u64;
503
504    let low_count = low.count_ones() as usize;
505    if n > low_count {
506        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
507    } else {
508        nth_set_bit_u64(low, n as u64) as usize
509    }
510}
511
512#[inline(always)]
513fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
514    let v = v.reverse_bits();
515    let mut s: u64 = 64;
516
517    // Parallel bit count intermediates
518    let a = v - ((v >> 1) & (u64::MAX / 3));
519    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
520    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
521    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
522
523    // Branchless select
524    let t = (d >> 32) + (d >> 48);
525    s -= (t.wrapping_sub(n) & 256) >> 3;
526    n -= t & (t.wrapping_sub(n) >> 8);
527
528    let t = (d >> (s - 16)) & 0xff;
529    s -= (t.wrapping_sub(n) & 256) >> 4;
530    n -= t & (t.wrapping_sub(n) >> 8);
531
532    let t = (c >> (s - 8)) & 0xf;
533    s -= (t.wrapping_sub(n) & 256) >> 5;
534    n -= t & (t.wrapping_sub(n) >> 8);
535
536    let t = (b >> (s - 4)) & 0x7;
537    s -= (t.wrapping_sub(n) & 256) >> 6;
538    n -= t & (t.wrapping_sub(n) >> 8);
539
540    let t = (a >> (s - 2)) & 0x3;
541    s -= (t.wrapping_sub(n) & 256) >> 7;
542    n -= t & (t.wrapping_sub(n) >> 8);
543
544    let t = (v >> (s - 1)) & 0x1;
545    s -= (t.wrapping_sub(n) & 256) >> 8;
546
547    65 - s - 1
548}
549
550#[cfg(test)]
551mod tests {
552    use super::*;
553    use rand::prelude::*;
554    use util::RandomCharIter;
555
556    #[gpui::test(iterations = 100)]
557    fn test_random_chunks(mut rng: StdRng) {
558        let chunk_len = rng.gen_range(0..=MAX_BASE);
559        let text = RandomCharIter::new(&mut rng)
560            .take(chunk_len)
561            .collect::<String>();
562        let mut ix = chunk_len;
563        while !text.is_char_boundary(ix) {
564            ix -= 1;
565        }
566        let text = &text[..ix];
567
568        log::info!("Chunk: {:?}", text);
569        let chunk = Chunk::new(&text);
570        verify_chunk(chunk.as_slice(), text);
571
572        for _ in 0..10 {
573            let mut start = rng.gen_range(0..=chunk.text.len());
574            let mut end = rng.gen_range(start..=chunk.text.len());
575            while !chunk.text.is_char_boundary(start) {
576                start -= 1;
577            }
578            while !chunk.text.is_char_boundary(end) {
579                end -= 1;
580            }
581            let range = start..end;
582            log::info!("Range: {:?}", range);
583            let text_slice = &text[range.clone()];
584            let chunk_slice = chunk.slice(range);
585            verify_chunk(chunk_slice, text_slice);
586        }
587    }
588
589    #[gpui::test(iterations = 1000)]
590    fn test_nth_set_bit_random(mut rng: StdRng) {
591        let set_count = rng.gen_range(0..=128);
592        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
593        set_bits.sort();
594        let mut n = 0;
595        for ix in set_bits.iter().copied() {
596            n |= 1 << ix;
597        }
598
599        for (mut ix, position) in set_bits.into_iter().enumerate() {
600            ix += 1;
601            assert_eq!(
602                nth_set_bit(n, ix),
603                position,
604                "nth_set_bit({:0128b}, {})",
605                n,
606                ix
607            );
608        }
609    }
610
611    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
612        let mut offset = 0;
613        let mut offset_utf16 = OffsetUtf16(0);
614        let mut point = Point::zero();
615        let mut point_utf16 = PointUtf16::zero();
616
617        log::info!("Verifying chunk {:?}", text);
618        assert_eq!(chunk.offset_to_point(0), Point::zero());
619
620        for c in text.chars() {
621            let expected_point = chunk.offset_to_point(offset);
622            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
623            assert_eq!(
624                chunk.point_to_offset(point),
625                offset,
626                "mismatch at point {:?}",
627                point
628            );
629            assert_eq!(
630                chunk.offset_to_offset_utf16(offset),
631                offset_utf16,
632                "mismatch at offset {}",
633                offset
634            );
635            assert_eq!(
636                chunk.offset_utf16_to_offset(offset_utf16),
637                offset,
638                "mismatch at offset_utf16 {:?}",
639                offset_utf16
640            );
641            assert_eq!(
642                chunk.point_to_point_utf16(point),
643                point_utf16,
644                "mismatch at point {:?}",
645                point
646            );
647            assert_eq!(
648                chunk.point_utf16_to_offset(point_utf16, false),
649                offset,
650                "mismatch at point_utf16 {:?}",
651                point_utf16
652            );
653            assert_eq!(
654                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
655                point,
656                "mismatch for unclipped_point_utf16_to_point at {:?}",
657                point_utf16
658            );
659
660            assert_eq!(
661                chunk.clip_point(point, Bias::Left),
662                point,
663                "incorrect left clip at {:?}",
664                point
665            );
666            assert_eq!(
667                chunk.clip_point(point, Bias::Right),
668                point,
669                "incorrect right clip at {:?}",
670                point
671            );
672
673            for i in 1..c.len_utf8() {
674                let test_point = Point::new(point.row, point.column + i as u32);
675                assert_eq!(
676                    chunk.clip_point(test_point, Bias::Left),
677                    point,
678                    "incorrect left clip within multi-byte char at {:?}",
679                    test_point
680                );
681                assert_eq!(
682                    chunk.clip_point(test_point, Bias::Right),
683                    Point::new(point.row, point.column + c.len_utf8() as u32),
684                    "incorrect right clip within multi-byte char at {:?}",
685                    test_point
686                );
687            }
688
689            for i in 1..c.len_utf16() {
690                let test_point = Unclipped(PointUtf16::new(
691                    point_utf16.row,
692                    point_utf16.column + i as u32,
693                ));
694                assert_eq!(
695                    chunk.unclipped_point_utf16_to_point(test_point),
696                    point,
697                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
698                    test_point
699                );
700                assert_eq!(
701                    chunk.clip_point_utf16(test_point, Bias::Left),
702                    point_utf16,
703                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
704                    test_point
705                );
706                assert_eq!(
707                    chunk.clip_point_utf16(test_point, Bias::Right),
708                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
709                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
710                    test_point
711                );
712
713                let test_offset = OffsetUtf16(offset_utf16.0 + i);
714                assert_eq!(
715                    chunk.clip_offset_utf16(test_offset, Bias::Left),
716                    offset_utf16,
717                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
718                    test_offset
719                );
720                assert_eq!(
721                    chunk.clip_offset_utf16(test_offset, Bias::Right),
722                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
723                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
724                    test_offset
725                );
726            }
727
728            if c == '\n' {
729                point.row += 1;
730                point.column = 0;
731                point_utf16.row += 1;
732                point_utf16.column = 0;
733            } else {
734                point.column += c.len_utf8() as u32;
735                point_utf16.column += c.len_utf16() as u32;
736            }
737
738            offset += c.len_utf8();
739            offset_utf16.0 += c.len_utf16();
740        }
741
742        let final_point = chunk.offset_to_point(offset);
743        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
744        assert_eq!(
745            chunk.point_to_offset(point),
746            offset,
747            "mismatch at point {:?}",
748            point
749        );
750        assert_eq!(
751            chunk.offset_to_offset_utf16(offset),
752            offset_utf16,
753            "mismatch at offset {}",
754            offset
755        );
756        assert_eq!(
757            chunk.offset_utf16_to_offset(offset_utf16),
758            offset,
759            "mismatch at offset_utf16 {:?}",
760            offset_utf16
761        );
762        assert_eq!(
763            chunk.point_to_point_utf16(point),
764            point_utf16,
765            "mismatch at final point {:?}",
766            point
767        );
768        assert_eq!(
769            chunk.point_utf16_to_offset(point_utf16, false),
770            offset,
771            "mismatch at final point_utf16 {:?}",
772            point_utf16
773        );
774        assert_eq!(
775            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
776            point,
777            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
778            point_utf16
779        );
780        assert_eq!(
781            chunk.clip_point(point, Bias::Left),
782            point,
783            "incorrect left clip at final point {:?}",
784            point
785        );
786        assert_eq!(
787            chunk.clip_point(point, Bias::Right),
788            point,
789            "incorrect right clip at final point {:?}",
790            point
791        );
792        assert_eq!(
793            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
794            point_utf16,
795            "incorrect left clip_point_utf16 at final point {:?}",
796            point_utf16
797        );
798        assert_eq!(
799            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
800            point_utf16,
801            "incorrect right clip_point_utf16 at final point {:?}",
802            point_utf16
803        );
804        assert_eq!(
805            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
806            offset_utf16,
807            "incorrect left clip_offset_utf16 at final offset {:?}",
808            offset_utf16
809        );
810        assert_eq!(
811            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
812            offset_utf16,
813            "incorrect right clip_offset_utf16 at final offset {:?}",
814            offset_utf16
815        );
816
817        // Verify length methods
818        assert_eq!(chunk.len(), text.len());
819        assert_eq!(
820            chunk.len_utf16().0,
821            text.chars().map(|c| c.len_utf16()).sum::<usize>()
822        );
823
824        // Verify line counting
825        let lines = chunk.lines();
826        let mut newline_count = 0;
827        let mut last_line_len = 0;
828        for c in text.chars() {
829            if c == '\n' {
830                newline_count += 1;
831                last_line_len = 0;
832            } else {
833                last_line_len += c.len_utf8() as u32;
834            }
835        }
836        assert_eq!(lines, Point::new(newline_count, last_line_len));
837
838        // Verify first/last line chars
839        if !text.is_empty() {
840            let first_line = text.split('\n').next().unwrap();
841            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
842
843            let last_line = text.split('\n').last().unwrap();
844            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
845            assert_eq!(
846                chunk.last_line_len_utf16(),
847                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
848            );
849        }
850
851        // Verify longest row
852        let (longest_row, longest_chars) = chunk.longest_row();
853        let mut max_chars = 0;
854        let mut current_row = 0;
855        let mut current_chars = 0;
856        let mut max_row = 0;
857
858        for c in text.chars() {
859            if c == '\n' {
860                if current_chars > max_chars {
861                    max_chars = current_chars;
862                    max_row = current_row;
863                }
864                current_row += 1;
865                current_chars = 0;
866            } else {
867                current_chars += 1;
868            }
869        }
870
871        if current_chars > max_chars {
872            max_chars = current_chars;
873            max_row = current_row;
874        }
875
876        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
877    }
878}