chunk.rs

  1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
  2use arrayvec::ArrayString;
  3use std::{cmp, ops::Range};
  4use sum_tree::Bias;
  5use unicode_segmentation::GraphemeCursor;
  6use util::debug_panic;
  7
  8pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
  9pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
 10
 11#[derive(Clone, Debug, Default)]
 12pub struct Chunk {
 13    chars: u128,
 14    chars_utf16: u128,
 15    newlines: u128,
 16    tabs: u128,
 17    pub text: ArrayString<MAX_BASE>,
 18}
 19
 20impl Chunk {
 21    #[inline(always)]
 22    pub fn new(text: &str) -> Self {
 23        let mut this = Chunk::default();
 24        this.push_str(text);
 25        this
 26    }
 27
 28    #[inline(always)]
 29    pub fn push_str(&mut self, text: &str) {
 30        for (char_ix, c) in text.char_indices() {
 31            let ix = self.text.len() + char_ix;
 32            self.chars |= 1 << ix;
 33            self.chars_utf16 |= 1 << ix;
 34            self.chars_utf16 |= (c.len_utf16() as u128) << ix;
 35            self.newlines |= ((c == '\n') as u128) << ix;
 36            self.tabs |= ((c == '\t') as u128) << ix;
 37        }
 38        self.text.push_str(text);
 39    }
 40
 41    #[inline(always)]
 42    pub fn append(&mut self, slice: ChunkSlice) {
 43        if slice.is_empty() {
 44            return;
 45        };
 46
 47        let base_ix = self.text.len();
 48        self.chars |= slice.chars << base_ix;
 49        self.chars_utf16 |= slice.chars_utf16 << base_ix;
 50        self.newlines |= slice.newlines << base_ix;
 51        self.tabs |= slice.tabs << base_ix;
 52        self.text.push_str(slice.text);
 53    }
 54
 55    #[inline(always)]
 56    pub fn as_slice(&self) -> ChunkSlice<'_> {
 57        ChunkSlice {
 58            chars: self.chars,
 59            chars_utf16: self.chars_utf16,
 60            newlines: self.newlines,
 61            tabs: self.tabs,
 62            text: &self.text,
 63        }
 64    }
 65
 66    #[inline(always)]
 67    pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
 68        self.as_slice().slice(range)
 69    }
 70}
 71
 72#[derive(Clone, Copy, Debug)]
 73pub struct ChunkSlice<'a> {
 74    chars: u128,
 75    chars_utf16: u128,
 76    newlines: u128,
 77    tabs: u128,
 78    text: &'a str,
 79}
 80
 81impl Into<Chunk> for ChunkSlice<'_> {
 82    fn into(self) -> Chunk {
 83        Chunk {
 84            chars: self.chars,
 85            chars_utf16: self.chars_utf16,
 86            newlines: self.newlines,
 87            tabs: self.tabs,
 88            text: self.text.try_into().unwrap(),
 89        }
 90    }
 91}
 92
 93impl<'a> ChunkSlice<'a> {
 94    #[inline(always)]
 95    pub fn is_empty(&self) -> bool {
 96        self.text.is_empty()
 97    }
 98
 99    #[inline(always)]
100    pub fn is_char_boundary(self, offset: usize) -> bool {
101        self.text.is_char_boundary(offset)
102    }
103
104    #[inline(always)]
105    pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
106        if mid == MAX_BASE {
107            let left = self;
108            let right = ChunkSlice {
109                chars: 0,
110                chars_utf16: 0,
111                newlines: 0,
112                tabs: 0,
113                text: "",
114            };
115            (left, right)
116        } else {
117            let mask = (1u128 << mid) - 1;
118            let (left_text, right_text) = self.text.split_at(mid);
119            let left = ChunkSlice {
120                chars: self.chars & mask,
121                chars_utf16: self.chars_utf16 & mask,
122                newlines: self.newlines & mask,
123                tabs: self.tabs & mask,
124                text: left_text,
125            };
126            let right = ChunkSlice {
127                chars: self.chars >> mid,
128                chars_utf16: self.chars_utf16 >> mid,
129                newlines: self.newlines >> mid,
130                tabs: self.tabs >> mid,
131                text: right_text,
132            };
133            (left, right)
134        }
135    }
136
137    #[inline(always)]
138    pub fn slice(self, range: Range<usize>) -> Self {
139        let mask = if range.end == MAX_BASE {
140            u128::MAX
141        } else {
142            (1u128 << range.end) - 1
143        };
144        if range.start == MAX_BASE {
145            Self {
146                chars: 0,
147                chars_utf16: 0,
148                newlines: 0,
149                tabs: 0,
150                text: "",
151            }
152        } else {
153            Self {
154                chars: (self.chars & mask) >> range.start,
155                chars_utf16: (self.chars_utf16 & mask) >> range.start,
156                newlines: (self.newlines & mask) >> range.start,
157                tabs: (self.tabs & mask) >> range.start,
158                text: &self.text[range],
159            }
160        }
161    }
162
163    #[inline(always)]
164    pub fn text_summary(&self) -> TextSummary {
165        let mut chars = 0;
166        let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
167        TextSummary {
168            len: self.len(),
169            chars,
170            len_utf16: self.len_utf16(),
171            lines: self.lines(),
172            first_line_chars: self.first_line_chars(),
173            last_line_chars: self.last_line_chars(),
174            last_line_len_utf16: self.last_line_len_utf16(),
175            longest_row,
176            longest_row_chars,
177        }
178    }
179
180    /// Get length in bytes
181    #[inline(always)]
182    pub fn len(&self) -> usize {
183        self.text.len()
184    }
185
186    /// Get length in UTF-16 code units
187    #[inline(always)]
188    pub fn len_utf16(&self) -> OffsetUtf16 {
189        OffsetUtf16(self.chars_utf16.count_ones() as usize)
190    }
191
192    /// Get point representing number of lines and length of last line
193    #[inline(always)]
194    pub fn lines(&self) -> Point {
195        let row = self.newlines.count_ones();
196        let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
197        Point::new(row, column)
198    }
199
200    /// Get number of chars in first line
201    #[inline(always)]
202    pub fn first_line_chars(&self) -> u32 {
203        if self.newlines == 0 {
204            self.chars.count_ones()
205        } else {
206            let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
207            (self.chars & mask).count_ones()
208        }
209    }
210
211    /// Get number of chars in last line
212    #[inline(always)]
213    pub fn last_line_chars(&self) -> u32 {
214        if self.newlines == 0 {
215            self.chars.count_ones()
216        } else {
217            let mask = !(u128::MAX >> self.newlines.leading_zeros());
218            (self.chars & mask).count_ones()
219        }
220    }
221
222    /// Get number of UTF-16 code units in last line
223    #[inline(always)]
224    pub fn last_line_len_utf16(&self) -> u32 {
225        if self.newlines == 0 {
226            self.chars_utf16.count_ones()
227        } else {
228            let mask = !(u128::MAX >> self.newlines.leading_zeros());
229            (self.chars_utf16 & mask).count_ones()
230        }
231    }
232
233    /// Get the longest row in the chunk and its length in characters.
234    /// Calculate the total number of characters in the chunk along the way.
235    #[inline(always)]
236    pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
237        let mut chars = self.chars;
238        let mut newlines = self.newlines;
239        *total_chars = 0;
240        let mut row = 0;
241        let mut longest_row = 0;
242        let mut longest_row_chars = 0;
243        while newlines > 0 {
244            let newline_ix = newlines.trailing_zeros();
245            let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
246            *total_chars += usize::from(row_chars);
247            if row_chars > longest_row_chars {
248                longest_row = row;
249                longest_row_chars = row_chars;
250            }
251
252            newlines >>= newline_ix;
253            newlines >>= 1;
254            chars >>= newline_ix;
255            chars >>= 1;
256            row += 1;
257            *total_chars += 1;
258        }
259
260        let row_chars = chars.count_ones() as u8;
261        *total_chars += usize::from(row_chars);
262        if row_chars > longest_row_chars {
263            (row, row_chars as u32)
264        } else {
265            (longest_row, longest_row_chars as u32)
266        }
267    }
268
269    #[inline(always)]
270    pub fn offset_to_point(&self, offset: usize) -> Point {
271        let mask = if offset == MAX_BASE {
272            u128::MAX
273        } else {
274            (1u128 << offset) - 1
275        };
276        let row = (self.newlines & mask).count_ones();
277        let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
278        let column = (offset - newline_ix as usize) as u32;
279        Point::new(row, column)
280    }
281
282    #[inline(always)]
283    pub fn point_to_offset(&self, point: Point) -> usize {
284        if point.row > self.lines().row {
285            debug_panic!(
286                "point {:?} extends beyond rows for string {:?}",
287                point,
288                self.text
289            );
290            return self.len();
291        }
292
293        let row_offset_range = self.offset_range_for_row(point.row);
294        if point.column > row_offset_range.len() as u32 {
295            debug_panic!(
296                "point {:?} extends beyond row for string {:?}",
297                point,
298                self.text
299            );
300            row_offset_range.end
301        } else {
302            row_offset_range.start + point.column as usize
303        }
304    }
305
306    #[inline(always)]
307    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
308        let mask = if offset == MAX_BASE {
309            u128::MAX
310        } else {
311            (1u128 << offset) - 1
312        };
313        OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
314    }
315
316    #[inline(always)]
317    pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
318        if target.0 == 0 {
319            0
320        } else {
321            let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
322            if ix == MAX_BASE {
323                MAX_BASE
324            } else {
325                let utf8_additional_len = cmp::min(
326                    (self.chars_utf16 >> ix).trailing_zeros() as usize,
327                    self.text.len() - ix,
328                );
329                ix + utf8_additional_len
330            }
331        }
332    }
333
334    #[inline(always)]
335    pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
336        let mask = if offset == MAX_BASE {
337            u128::MAX
338        } else {
339            (1u128 << offset) - 1
340        };
341        let row = (self.newlines & mask).count_ones();
342        let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
343        let column = if newline_ix as usize == MAX_BASE {
344            0
345        } else {
346            ((self.chars_utf16 & mask) >> newline_ix).count_ones()
347        };
348        PointUtf16::new(row, column)
349    }
350
351    #[inline(always)]
352    pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
353        self.offset_to_point_utf16(self.point_to_offset(point))
354    }
355
356    #[inline(always)]
357    pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
358        let lines = self.lines();
359        if point.row > lines.row {
360            if !clip {
361                debug_panic!(
362                    "point {:?} is beyond this chunk's extent {:?}",
363                    point,
364                    self.text
365                );
366            }
367            return self.len();
368        }
369
370        let row_offset_range = self.offset_range_for_row(point.row);
371        let line = self.slice(row_offset_range.clone());
372        if point.column > line.last_line_len_utf16() {
373            if !clip {
374                debug_panic!(
375                    "point {:?} is beyond the end of the line in chunk {:?}",
376                    point,
377                    self.text
378                );
379            }
380            return line.len();
381        }
382
383        let mut offset = row_offset_range.start;
384        if point.column > 0 {
385            offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
386            if !self.text.is_char_boundary(offset) {
387                offset -= 1;
388                while !self.text.is_char_boundary(offset) {
389                    offset -= 1;
390                }
391                if !clip {
392                    debug_panic!(
393                        "point {:?} is within character in chunk {:?}",
394                        point,
395                        self.text,
396                    );
397                }
398            }
399        }
400        offset
401    }
402
403    #[inline(always)]
404    pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
405        let max_point = self.lines();
406        if point.0.row > max_point.row {
407            return max_point;
408        }
409
410        let row_offset_range = self.offset_range_for_row(point.0.row);
411        let line = self.slice(row_offset_range);
412        if point.0.column == 0 {
413            Point::new(point.0.row, 0)
414        } else if point.0.column >= line.len_utf16().0 as u32 {
415            Point::new(point.0.row, line.len() as u32)
416        } else {
417            let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
418            while !line.text.is_char_boundary(column) {
419                column -= 1;
420            }
421            Point::new(point.0.row, column as u32)
422        }
423    }
424
425    #[inline(always)]
426    pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
427        let max_point = self.lines();
428        if point.row > max_point.row {
429            return max_point;
430        }
431
432        let line = self.slice(self.offset_range_for_row(point.row));
433        if point.column == 0 {
434            point
435        } else if point.column >= line.len() as u32 {
436            Point::new(point.row, line.len() as u32)
437        } else {
438            let mut column = point.column as usize;
439            let bytes = line.text.as_bytes();
440            if bytes[column - 1] < 128 && bytes[column] < 128 {
441                return Point::new(point.row, column as u32);
442            }
443
444            let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
445            loop {
446                if line.is_char_boundary(column)
447                    && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
448                {
449                    break;
450                }
451
452                match bias {
453                    Bias::Left => column -= 1,
454                    Bias::Right => column += 1,
455                }
456                grapheme_cursor.set_cursor(column);
457            }
458            Point::new(point.row, column as u32)
459        }
460    }
461
462    #[inline(always)]
463    pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
464        let max_point = self.lines();
465        if point.0.row > max_point.row {
466            PointUtf16::new(max_point.row, self.last_line_len_utf16())
467        } else {
468            let line = self.slice(self.offset_range_for_row(point.0.row));
469            let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
470            PointUtf16::new(point.0.row, column.0 as u32)
471        }
472    }
473
474    #[inline(always)]
475    pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
476        if target == OffsetUtf16::default() {
477            OffsetUtf16::default()
478        } else if target >= self.len_utf16() {
479            self.len_utf16()
480        } else {
481            let mut offset = self.offset_utf16_to_offset(target);
482            while !self.text.is_char_boundary(offset) {
483                if bias == Bias::Left {
484                    offset -= 1;
485                } else {
486                    offset += 1;
487                }
488            }
489            self.offset_to_offset_utf16(offset)
490        }
491    }
492
493    #[inline(always)]
494    fn offset_range_for_row(&self, row: u32) -> Range<usize> {
495        let row_start = if row > 0 {
496            nth_set_bit(self.newlines, row as usize) + 1
497        } else {
498            0
499        };
500        let row_len = if row_start == MAX_BASE {
501            0
502        } else {
503            cmp::min(
504                (self.newlines >> row_start).trailing_zeros(),
505                (self.text.len() - row_start) as u32,
506            )
507        };
508        row_start..row_start + row_len as usize
509    }
510
511    #[inline(always)]
512    pub fn tabs(&self) -> Tabs {
513        Tabs {
514            tabs: self.tabs,
515            chars: self.chars,
516        }
517    }
518}
519
520pub struct Tabs {
521    tabs: u128,
522    chars: u128,
523}
524
525#[derive(Debug, PartialEq, Eq)]
526pub struct TabPosition {
527    pub byte_offset: usize,
528    pub char_offset: usize,
529}
530
531impl Iterator for Tabs {
532    type Item = TabPosition;
533
534    fn next(&mut self) -> Option<Self::Item> {
535        if self.tabs == 0 {
536            return None;
537        }
538
539        let tab_offset = self.tabs.trailing_zeros() as usize;
540        let chars_mask = (1 << tab_offset) - 1;
541        let char_offset = (self.chars & chars_mask).count_ones() as usize;
542
543        // Since tabs are 1 byte the tab offset is the same as the byte offset
544        let position = TabPosition {
545            byte_offset: tab_offset,
546            char_offset,
547        };
548        // Remove the tab we've just seen
549        self.tabs ^= 1 << tab_offset;
550
551        Some(position)
552    }
553}
554
555/// Finds the n-th bit that is set to 1.
556#[inline(always)]
557fn nth_set_bit(v: u128, n: usize) -> usize {
558    let low = v as u64;
559    let high = (v >> 64) as u64;
560
561    let low_count = low.count_ones() as usize;
562    if n > low_count {
563        64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
564    } else {
565        nth_set_bit_u64(low, n as u64) as usize
566    }
567}
568
569#[inline(always)]
570fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
571    let v = v.reverse_bits();
572    let mut s: u64 = 64;
573
574    // Parallel bit count intermediates
575    let a = v - ((v >> 1) & (u64::MAX / 3));
576    let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
577    let c = (b + (b >> 4)) & (u64::MAX / 0x11);
578    let d = (c + (c >> 8)) & (u64::MAX / 0x101);
579
580    // Branchless select
581    let t = (d >> 32) + (d >> 48);
582    s -= (t.wrapping_sub(n) & 256) >> 3;
583    n -= t & (t.wrapping_sub(n) >> 8);
584
585    let t = (d >> (s - 16)) & 0xff;
586    s -= (t.wrapping_sub(n) & 256) >> 4;
587    n -= t & (t.wrapping_sub(n) >> 8);
588
589    let t = (c >> (s - 8)) & 0xf;
590    s -= (t.wrapping_sub(n) & 256) >> 5;
591    n -= t & (t.wrapping_sub(n) >> 8);
592
593    let t = (b >> (s - 4)) & 0x7;
594    s -= (t.wrapping_sub(n) & 256) >> 6;
595    n -= t & (t.wrapping_sub(n) >> 8);
596
597    let t = (a >> (s - 2)) & 0x3;
598    s -= (t.wrapping_sub(n) & 256) >> 7;
599    n -= t & (t.wrapping_sub(n) >> 8);
600
601    let t = (v >> (s - 1)) & 0x1;
602    s -= (t.wrapping_sub(n) & 256) >> 8;
603
604    65 - s - 1
605}
606
607#[cfg(test)]
608mod tests {
609    use super::*;
610    use rand::prelude::*;
611    use util::RandomCharIter;
612
613    #[gpui::test(iterations = 100)]
614    fn test_random_chunks(mut rng: StdRng) {
615        let chunk_len = rng.random_range(0..=MAX_BASE);
616        let text = RandomCharIter::new(&mut rng)
617            .take(chunk_len)
618            .collect::<String>();
619        let mut ix = chunk_len;
620        while !text.is_char_boundary(ix) {
621            ix -= 1;
622        }
623        let text = &text[..ix];
624
625        log::info!("Chunk: {:?}", text);
626        let chunk = Chunk::new(text);
627        verify_chunk(chunk.as_slice(), text);
628
629        for _ in 0..10 {
630            let mut start = rng.random_range(0..=chunk.text.len());
631            let mut end = rng.random_range(start..=chunk.text.len());
632            while !chunk.text.is_char_boundary(start) {
633                start -= 1;
634            }
635            while !chunk.text.is_char_boundary(end) {
636                end -= 1;
637            }
638            let range = start..end;
639            log::info!("Range: {:?}", range);
640            let text_slice = &text[range.clone()];
641            let chunk_slice = chunk.slice(range);
642            verify_chunk(chunk_slice, text_slice);
643        }
644    }
645
646    #[gpui::test(iterations = 1000)]
647    fn test_nth_set_bit_random(mut rng: StdRng) {
648        let set_count = rng.random_range(0..=128);
649        let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
650        set_bits.sort();
651        let mut n = 0;
652        for ix in set_bits.iter().copied() {
653            n |= 1 << ix;
654        }
655
656        for (mut ix, position) in set_bits.into_iter().enumerate() {
657            ix += 1;
658            assert_eq!(
659                nth_set_bit(n, ix),
660                position,
661                "nth_set_bit({:0128b}, {})",
662                n,
663                ix
664            );
665        }
666    }
667
668    fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
669        let mut offset = 0;
670        let mut offset_utf16 = OffsetUtf16(0);
671        let mut point = Point::zero();
672        let mut point_utf16 = PointUtf16::zero();
673
674        log::info!("Verifying chunk {:?}", text);
675        assert_eq!(chunk.offset_to_point(0), Point::zero());
676
677        let mut expected_tab_positions = Vec::new();
678
679        for (char_offset, c) in text.chars().enumerate() {
680            let expected_point = chunk.offset_to_point(offset);
681            assert_eq!(point, expected_point, "mismatch at offset {}", offset);
682            assert_eq!(
683                chunk.point_to_offset(point),
684                offset,
685                "mismatch at point {:?}",
686                point
687            );
688            assert_eq!(
689                chunk.offset_to_offset_utf16(offset),
690                offset_utf16,
691                "mismatch at offset {}",
692                offset
693            );
694            assert_eq!(
695                chunk.offset_utf16_to_offset(offset_utf16),
696                offset,
697                "mismatch at offset_utf16 {:?}",
698                offset_utf16
699            );
700            assert_eq!(
701                chunk.point_to_point_utf16(point),
702                point_utf16,
703                "mismatch at point {:?}",
704                point
705            );
706            assert_eq!(
707                chunk.point_utf16_to_offset(point_utf16, false),
708                offset,
709                "mismatch at point_utf16 {:?}",
710                point_utf16
711            );
712            assert_eq!(
713                chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
714                point,
715                "mismatch for unclipped_point_utf16_to_point at {:?}",
716                point_utf16
717            );
718
719            assert_eq!(
720                chunk.clip_point(point, Bias::Left),
721                point,
722                "incorrect left clip at {:?}",
723                point
724            );
725            assert_eq!(
726                chunk.clip_point(point, Bias::Right),
727                point,
728                "incorrect right clip at {:?}",
729                point
730            );
731
732            for i in 1..c.len_utf8() {
733                let test_point = Point::new(point.row, point.column + i as u32);
734                assert_eq!(
735                    chunk.clip_point(test_point, Bias::Left),
736                    point,
737                    "incorrect left clip within multi-byte char at {:?}",
738                    test_point
739                );
740                assert_eq!(
741                    chunk.clip_point(test_point, Bias::Right),
742                    Point::new(point.row, point.column + c.len_utf8() as u32),
743                    "incorrect right clip within multi-byte char at {:?}",
744                    test_point
745                );
746            }
747
748            for i in 1..c.len_utf16() {
749                let test_point = Unclipped(PointUtf16::new(
750                    point_utf16.row,
751                    point_utf16.column + i as u32,
752                ));
753                assert_eq!(
754                    chunk.unclipped_point_utf16_to_point(test_point),
755                    point,
756                    "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
757                    test_point
758                );
759                assert_eq!(
760                    chunk.clip_point_utf16(test_point, Bias::Left),
761                    point_utf16,
762                    "incorrect left clip_point_utf16 within multi-byte char at {:?}",
763                    test_point
764                );
765                assert_eq!(
766                    chunk.clip_point_utf16(test_point, Bias::Right),
767                    PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
768                    "incorrect right clip_point_utf16 within multi-byte char at {:?}",
769                    test_point
770                );
771
772                let test_offset = OffsetUtf16(offset_utf16.0 + i);
773                assert_eq!(
774                    chunk.clip_offset_utf16(test_offset, Bias::Left),
775                    offset_utf16,
776                    "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
777                    test_offset
778                );
779                assert_eq!(
780                    chunk.clip_offset_utf16(test_offset, Bias::Right),
781                    OffsetUtf16(offset_utf16.0 + c.len_utf16()),
782                    "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
783                    test_offset
784                );
785            }
786
787            if c == '\n' {
788                point.row += 1;
789                point.column = 0;
790                point_utf16.row += 1;
791                point_utf16.column = 0;
792            } else {
793                point.column += c.len_utf8() as u32;
794                point_utf16.column += c.len_utf16() as u32;
795            }
796
797            if c == '\t' {
798                expected_tab_positions.push(TabPosition {
799                    byte_offset: offset,
800                    char_offset,
801                });
802            }
803
804            offset += c.len_utf8();
805            offset_utf16.0 += c.len_utf16();
806        }
807
808        let final_point = chunk.offset_to_point(offset);
809        assert_eq!(point, final_point, "mismatch at final offset {}", offset);
810        assert_eq!(
811            chunk.point_to_offset(point),
812            offset,
813            "mismatch at point {:?}",
814            point
815        );
816        assert_eq!(
817            chunk.offset_to_offset_utf16(offset),
818            offset_utf16,
819            "mismatch at offset {}",
820            offset
821        );
822        assert_eq!(
823            chunk.offset_utf16_to_offset(offset_utf16),
824            offset,
825            "mismatch at offset_utf16 {:?}",
826            offset_utf16
827        );
828        assert_eq!(
829            chunk.point_to_point_utf16(point),
830            point_utf16,
831            "mismatch at final point {:?}",
832            point
833        );
834        assert_eq!(
835            chunk.point_utf16_to_offset(point_utf16, false),
836            offset,
837            "mismatch at final point_utf16 {:?}",
838            point_utf16
839        );
840        assert_eq!(
841            chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
842            point,
843            "mismatch for unclipped_point_utf16_to_point at final point {:?}",
844            point_utf16
845        );
846        assert_eq!(
847            chunk.clip_point(point, Bias::Left),
848            point,
849            "incorrect left clip at final point {:?}",
850            point
851        );
852        assert_eq!(
853            chunk.clip_point(point, Bias::Right),
854            point,
855            "incorrect right clip at final point {:?}",
856            point
857        );
858        assert_eq!(
859            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
860            point_utf16,
861            "incorrect left clip_point_utf16 at final point {:?}",
862            point_utf16
863        );
864        assert_eq!(
865            chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
866            point_utf16,
867            "incorrect right clip_point_utf16 at final point {:?}",
868            point_utf16
869        );
870        assert_eq!(
871            chunk.clip_offset_utf16(offset_utf16, Bias::Left),
872            offset_utf16,
873            "incorrect left clip_offset_utf16 at final offset {:?}",
874            offset_utf16
875        );
876        assert_eq!(
877            chunk.clip_offset_utf16(offset_utf16, Bias::Right),
878            offset_utf16,
879            "incorrect right clip_offset_utf16 at final offset {:?}",
880            offset_utf16
881        );
882
883        // Verify length methods
884        assert_eq!(chunk.len(), text.len());
885        assert_eq!(
886            chunk.len_utf16().0,
887            text.chars().map(|c| c.len_utf16()).sum::<usize>()
888        );
889
890        // Verify line counting
891        let lines = chunk.lines();
892        let mut newline_count = 0;
893        let mut last_line_len = 0;
894        for c in text.chars() {
895            if c == '\n' {
896                newline_count += 1;
897                last_line_len = 0;
898            } else {
899                last_line_len += c.len_utf8() as u32;
900            }
901        }
902        assert_eq!(lines, Point::new(newline_count, last_line_len));
903
904        // Verify first/last line chars
905        if !text.is_empty() {
906            let first_line = text.split('\n').next().unwrap();
907            assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
908
909            let last_line = text.split('\n').next_back().unwrap();
910            assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
911            assert_eq!(
912                chunk.last_line_len_utf16(),
913                last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
914            );
915        }
916
917        // Verify longest row
918        let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
919        let mut max_chars = 0;
920        let mut current_row = 0;
921        let mut current_chars = 0;
922        let mut max_row = 0;
923
924        for c in text.chars() {
925            if c == '\n' {
926                if current_chars > max_chars {
927                    max_chars = current_chars;
928                    max_row = current_row;
929                }
930                current_row += 1;
931                current_chars = 0;
932            } else {
933                current_chars += 1;
934            }
935        }
936
937        if current_chars > max_chars {
938            max_chars = current_chars;
939            max_row = current_row;
940        }
941
942        assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
943        assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
944    }
945}