1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
2use arrayvec::ArrayString;
3use std::{cmp, ops::Range};
4use sum_tree::Bias;
5use unicode_segmentation::GraphemeCursor;
6use util::debug_panic;
7
8pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
9pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
10
11#[derive(Clone, Debug, Default)]
12pub struct Chunk {
13 chars: u128,
14 chars_utf16: u128,
15 newlines: u128,
16 tabs: u128,
17 pub text: ArrayString<MAX_BASE>,
18}
19
20impl Chunk {
21 #[inline(always)]
22 pub fn new(text: &str) -> Self {
23 let mut this = Chunk::default();
24 this.push_str(text);
25 this
26 }
27
28 #[inline(always)]
29 pub fn push_str(&mut self, text: &str) {
30 for (char_ix, c) in text.char_indices() {
31 let ix = self.text.len() + char_ix;
32 self.chars |= 1 << ix;
33 self.chars_utf16 |= 1 << ix;
34 self.chars_utf16 |= (c.len_utf16() as u128) << ix;
35 self.newlines |= ((c == '\n') as u128) << ix;
36 self.tabs |= ((c == '\t') as u128) << ix;
37 }
38 self.text.push_str(text);
39 }
40
41 #[inline(always)]
42 pub fn append(&mut self, slice: ChunkSlice) {
43 if slice.is_empty() {
44 return;
45 };
46
47 let base_ix = self.text.len();
48 self.chars |= slice.chars << base_ix;
49 self.chars_utf16 |= slice.chars_utf16 << base_ix;
50 self.newlines |= slice.newlines << base_ix;
51 self.tabs |= slice.tabs << base_ix;
52 self.text.push_str(&slice.text);
53 }
54
55 #[inline(always)]
56 pub fn as_slice(&self) -> ChunkSlice {
57 ChunkSlice {
58 chars: self.chars,
59 chars_utf16: self.chars_utf16,
60 newlines: self.newlines,
61 tabs: self.tabs,
62 text: &self.text,
63 }
64 }
65
66 #[inline(always)]
67 pub fn slice(&self, range: Range<usize>) -> ChunkSlice {
68 self.as_slice().slice(range)
69 }
70}
71
72#[derive(Clone, Copy, Debug)]
73pub struct ChunkSlice<'a> {
74 chars: u128,
75 chars_utf16: u128,
76 newlines: u128,
77 tabs: u128,
78 text: &'a str,
79}
80
81impl<'a> Into<Chunk> for ChunkSlice<'a> {
82 fn into(self) -> Chunk {
83 Chunk {
84 chars: self.chars,
85 chars_utf16: self.chars_utf16,
86 newlines: self.newlines,
87 tabs: self.tabs,
88 text: self.text.try_into().unwrap(),
89 }
90 }
91}
92
93impl<'a> ChunkSlice<'a> {
94 #[inline(always)]
95 pub fn is_empty(self) -> bool {
96 self.text.is_empty()
97 }
98
99 #[inline(always)]
100 pub fn is_char_boundary(self, offset: usize) -> bool {
101 self.text.is_char_boundary(offset)
102 }
103
104 #[inline(always)]
105 pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
106 if mid == MAX_BASE {
107 let left = self;
108 let right = ChunkSlice {
109 chars: 0,
110 chars_utf16: 0,
111 newlines: 0,
112 tabs: 0,
113 text: "",
114 };
115 (left, right)
116 } else {
117 let mask = (1u128 << mid) - 1;
118 let (left_text, right_text) = self.text.split_at(mid);
119 let left = ChunkSlice {
120 chars: self.chars & mask,
121 chars_utf16: self.chars_utf16 & mask,
122 newlines: self.newlines & mask,
123 tabs: self.tabs & mask,
124 text: left_text,
125 };
126 let right = ChunkSlice {
127 chars: self.chars >> mid,
128 chars_utf16: self.chars_utf16 >> mid,
129 newlines: self.newlines >> mid,
130 tabs: self.tabs >> mid,
131 text: right_text,
132 };
133 (left, right)
134 }
135 }
136
137 #[inline(always)]
138 pub fn slice(self, range: Range<usize>) -> Self {
139 let mask = if range.end == MAX_BASE {
140 u128::MAX
141 } else {
142 (1u128 << range.end) - 1
143 };
144 if range.start == MAX_BASE {
145 Self {
146 chars: 0,
147 chars_utf16: 0,
148 newlines: 0,
149 tabs: 0,
150 text: "",
151 }
152 } else {
153 Self {
154 chars: (self.chars & mask) >> range.start,
155 chars_utf16: (self.chars_utf16 & mask) >> range.start,
156 newlines: (self.newlines & mask) >> range.start,
157 tabs: (self.tabs & mask) >> range.start,
158 text: &self.text[range],
159 }
160 }
161 }
162
163 #[inline(always)]
164 pub fn text_summary(&self) -> TextSummary {
165 let (longest_row, longest_row_chars) = self.longest_row();
166 TextSummary {
167 len: self.len(),
168 len_utf16: self.len_utf16(),
169 lines: self.lines(),
170 first_line_chars: self.first_line_chars(),
171 last_line_chars: self.last_line_chars(),
172 last_line_len_utf16: self.last_line_len_utf16(),
173 longest_row,
174 longest_row_chars,
175 }
176 }
177
178 /// Get length in bytes
179 #[inline(always)]
180 pub fn len(&self) -> usize {
181 self.text.len()
182 }
183
184 /// Get length in UTF-16 code units
185 #[inline(always)]
186 pub fn len_utf16(&self) -> OffsetUtf16 {
187 OffsetUtf16(self.chars_utf16.count_ones() as usize)
188 }
189
190 /// Get point representing number of lines and length of last line
191 #[inline(always)]
192 pub fn lines(&self) -> Point {
193 let row = self.newlines.count_ones();
194 let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
195 Point::new(row, column)
196 }
197
198 /// Get number of chars in first line
199 #[inline(always)]
200 pub fn first_line_chars(&self) -> u32 {
201 if self.newlines == 0 {
202 self.chars.count_ones()
203 } else {
204 let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
205 (self.chars & mask).count_ones()
206 }
207 }
208
209 /// Get number of chars in last line
210 #[inline(always)]
211 pub fn last_line_chars(&self) -> u32 {
212 if self.newlines == 0 {
213 self.chars.count_ones()
214 } else {
215 let mask = !(u128::MAX >> self.newlines.leading_zeros());
216 (self.chars & mask).count_ones()
217 }
218 }
219
220 /// Get number of UTF-16 code units in last line
221 #[inline(always)]
222 pub fn last_line_len_utf16(&self) -> u32 {
223 if self.newlines == 0 {
224 self.chars_utf16.count_ones()
225 } else {
226 let mask = !(u128::MAX >> self.newlines.leading_zeros());
227 (self.chars_utf16 & mask).count_ones()
228 }
229 }
230
231 /// Get the longest row in the chunk and its length in characters.
232 #[inline(always)]
233 pub fn longest_row(&self) -> (u32, u32) {
234 let mut chars = self.chars;
235 let mut newlines = self.newlines;
236 let mut row = 0;
237 let mut longest_row = 0;
238 let mut longest_row_chars = 0;
239 while newlines > 0 {
240 let newline_ix = newlines.trailing_zeros();
241 let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
242 if row_chars > longest_row_chars {
243 longest_row = row;
244 longest_row_chars = row_chars;
245 }
246
247 newlines >>= newline_ix;
248 newlines >>= 1;
249 chars >>= newline_ix;
250 chars >>= 1;
251 row += 1;
252 }
253
254 let row_chars = chars.count_ones() as u8;
255 if row_chars > longest_row_chars {
256 (row, row_chars as u32)
257 } else {
258 (longest_row, longest_row_chars as u32)
259 }
260 }
261
262 #[inline(always)]
263 pub fn offset_to_point(&self, offset: usize) -> Point {
264 let mask = if offset == MAX_BASE {
265 u128::MAX
266 } else {
267 (1u128 << offset) - 1
268 };
269 let row = (self.newlines & mask).count_ones();
270 let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
271 let column = (offset - newline_ix as usize) as u32;
272 Point::new(row, column)
273 }
274
275 #[inline(always)]
276 pub fn point_to_offset(&self, point: Point) -> usize {
277 if point.row > self.lines().row {
278 debug_panic!(
279 "point {:?} extends beyond rows for string {:?}",
280 point,
281 self.text
282 );
283 return self.len();
284 }
285
286 let row_offset_range = self.offset_range_for_row(point.row);
287 if point.column > row_offset_range.len() as u32 {
288 debug_panic!(
289 "point {:?} extends beyond row for string {:?}",
290 point,
291 self.text
292 );
293 row_offset_range.end
294 } else {
295 row_offset_range.start + point.column as usize
296 }
297 }
298
299 #[inline(always)]
300 pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
301 let mask = if offset == MAX_BASE {
302 u128::MAX
303 } else {
304 (1u128 << offset) - 1
305 };
306 OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
307 }
308
309 #[inline(always)]
310 pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
311 if target.0 == 0 {
312 0
313 } else {
314 let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
315 if ix == MAX_BASE {
316 MAX_BASE
317 } else {
318 let utf8_additional_len = cmp::min(
319 (self.chars_utf16 >> ix).trailing_zeros() as usize,
320 self.text.len() - ix,
321 );
322 ix + utf8_additional_len
323 }
324 }
325 }
326
327 #[inline(always)]
328 pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
329 let mask = if offset == MAX_BASE {
330 u128::MAX
331 } else {
332 (1u128 << offset) - 1
333 };
334 let row = (self.newlines & mask).count_ones();
335 let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
336 let column = if newline_ix as usize == MAX_BASE {
337 0
338 } else {
339 ((self.chars_utf16 & mask) >> newline_ix).count_ones()
340 };
341 PointUtf16::new(row, column)
342 }
343
344 #[inline(always)]
345 pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
346 self.offset_to_point_utf16(self.point_to_offset(point))
347 }
348
349 #[inline(always)]
350 pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
351 let lines = self.lines();
352 if point.row > lines.row {
353 if !clip {
354 debug_panic!(
355 "point {:?} is beyond this chunk's extent {:?}",
356 point,
357 self.text
358 );
359 }
360 return self.len();
361 }
362
363 let row_offset_range = self.offset_range_for_row(point.row);
364 let line = self.slice(row_offset_range.clone());
365 if point.column > line.last_line_len_utf16() {
366 if !clip {
367 debug_panic!(
368 "point {:?} is beyond the end of the line in chunk {:?}",
369 point,
370 self.text
371 );
372 }
373 return line.len();
374 }
375
376 let mut offset = row_offset_range.start;
377 if point.column > 0 {
378 offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
379 if !self.text.is_char_boundary(offset) {
380 offset -= 1;
381 while !self.text.is_char_boundary(offset) {
382 offset -= 1;
383 }
384 if !clip {
385 debug_panic!(
386 "point {:?} is within character in chunk {:?}",
387 point,
388 self.text,
389 );
390 }
391 }
392 }
393 offset
394 }
395
396 #[inline(always)]
397 pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
398 let max_point = self.lines();
399 if point.0.row > max_point.row {
400 return max_point;
401 }
402
403 let row_offset_range = self.offset_range_for_row(point.0.row);
404 let line = self.slice(row_offset_range.clone());
405 if point.0.column == 0 {
406 Point::new(point.0.row, 0)
407 } else if point.0.column >= line.len_utf16().0 as u32 {
408 Point::new(point.0.row, line.len() as u32)
409 } else {
410 let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
411 while !line.text.is_char_boundary(column) {
412 column -= 1;
413 }
414 Point::new(point.0.row, column as u32)
415 }
416 }
417
418 #[inline(always)]
419 pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
420 let max_point = self.lines();
421 if point.row > max_point.row {
422 return max_point;
423 }
424
425 let line = self.slice(self.offset_range_for_row(point.row));
426 if point.column == 0 {
427 point
428 } else if point.column >= line.len() as u32 {
429 Point::new(point.row, line.len() as u32)
430 } else {
431 let mut column = point.column as usize;
432 let bytes = line.text.as_bytes();
433 if bytes[column - 1] < 128 && bytes[column] < 128 {
434 return Point::new(point.row, column as u32);
435 }
436
437 let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
438 loop {
439 if line.is_char_boundary(column)
440 && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
441 {
442 break;
443 }
444
445 match bias {
446 Bias::Left => column -= 1,
447 Bias::Right => column += 1,
448 }
449 grapheme_cursor.set_cursor(column);
450 }
451 Point::new(point.row, column as u32)
452 }
453 }
454
455 #[inline(always)]
456 pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
457 let max_point = self.lines();
458 if point.0.row > max_point.row {
459 PointUtf16::new(max_point.row, self.last_line_len_utf16())
460 } else {
461 let line = self.slice(self.offset_range_for_row(point.0.row));
462 let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
463 PointUtf16::new(point.0.row, column.0 as u32)
464 }
465 }
466
467 #[inline(always)]
468 pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
469 if target == OffsetUtf16::default() {
470 OffsetUtf16::default()
471 } else if target >= self.len_utf16() {
472 self.len_utf16()
473 } else {
474 let mut offset = self.offset_utf16_to_offset(target);
475 while !self.text.is_char_boundary(offset) {
476 if bias == Bias::Left {
477 offset -= 1;
478 } else {
479 offset += 1;
480 }
481 }
482 self.offset_to_offset_utf16(offset)
483 }
484 }
485
486 #[inline(always)]
487 fn offset_range_for_row(&self, row: u32) -> Range<usize> {
488 let row_start = if row > 0 {
489 nth_set_bit(self.newlines, row as usize) + 1
490 } else {
491 0
492 };
493 let row_len = if row_start == MAX_BASE {
494 0
495 } else {
496 cmp::min(
497 (self.newlines >> row_start).trailing_zeros(),
498 (self.text.len() - row_start) as u32,
499 )
500 };
501 row_start..row_start + row_len as usize
502 }
503
504 #[inline(always)]
505 pub fn tabs(&self) -> Tabs {
506 Tabs {
507 byte_offset: 0,
508 char_offset: 0,
509 tabs: self.tabs,
510 chars: self.chars,
511 }
512 }
513}
514
515pub struct Tabs {
516 byte_offset: usize,
517 char_offset: usize,
518 tabs: u128,
519 chars: u128,
520}
521
522#[derive(Debug, PartialEq, Eq)]
523pub struct TabPosition {
524 pub byte_offset: usize,
525 pub char_offset: usize,
526}
527
528impl Iterator for Tabs {
529 type Item = TabPosition;
530
531 fn next(&mut self) -> Option<Self::Item> {
532 if self.tabs == 0 {
533 return None;
534 }
535
536 let tab_offset = self.tabs.trailing_zeros() as usize;
537 let chars_mask = (1 << tab_offset) - 1;
538 let char_offset = (self.chars & chars_mask).count_ones() as usize;
539 self.byte_offset += tab_offset;
540 self.char_offset += char_offset;
541 let position = TabPosition {
542 byte_offset: self.byte_offset,
543 char_offset: self.char_offset,
544 };
545
546 self.byte_offset += 1;
547 self.char_offset += 1;
548 if self.byte_offset == MAX_BASE {
549 self.tabs = 0;
550 } else {
551 self.tabs >>= tab_offset + 1;
552 self.chars >>= tab_offset + 1;
553 }
554
555 Some(position)
556 }
557}
558
559/// Finds the n-th bit that is set to 1.
560#[inline(always)]
561fn nth_set_bit(v: u128, n: usize) -> usize {
562 let low = v as u64;
563 let high = (v >> 64) as u64;
564
565 let low_count = low.count_ones() as usize;
566 if n > low_count {
567 64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
568 } else {
569 nth_set_bit_u64(low, n as u64) as usize
570 }
571}
572
573#[inline(always)]
574fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
575 let v = v.reverse_bits();
576 let mut s: u64 = 64;
577
578 // Parallel bit count intermediates
579 let a = v - ((v >> 1) & (u64::MAX / 3));
580 let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
581 let c = (b + (b >> 4)) & (u64::MAX / 0x11);
582 let d = (c + (c >> 8)) & (u64::MAX / 0x101);
583
584 // Branchless select
585 let t = (d >> 32) + (d >> 48);
586 s -= (t.wrapping_sub(n) & 256) >> 3;
587 n -= t & (t.wrapping_sub(n) >> 8);
588
589 let t = (d >> (s - 16)) & 0xff;
590 s -= (t.wrapping_sub(n) & 256) >> 4;
591 n -= t & (t.wrapping_sub(n) >> 8);
592
593 let t = (c >> (s - 8)) & 0xf;
594 s -= (t.wrapping_sub(n) & 256) >> 5;
595 n -= t & (t.wrapping_sub(n) >> 8);
596
597 let t = (b >> (s - 4)) & 0x7;
598 s -= (t.wrapping_sub(n) & 256) >> 6;
599 n -= t & (t.wrapping_sub(n) >> 8);
600
601 let t = (a >> (s - 2)) & 0x3;
602 s -= (t.wrapping_sub(n) & 256) >> 7;
603 n -= t & (t.wrapping_sub(n) >> 8);
604
605 let t = (v >> (s - 1)) & 0x1;
606 s -= (t.wrapping_sub(n) & 256) >> 8;
607
608 65 - s - 1
609}
610
611#[cfg(test)]
612mod tests {
613 use super::*;
614 use rand::prelude::*;
615 use util::RandomCharIter;
616
617 #[gpui::test(iterations = 100)]
618 fn test_random_chunks(mut rng: StdRng) {
619 let chunk_len = rng.gen_range(0..=MAX_BASE);
620 let text = RandomCharIter::new(&mut rng)
621 .take(chunk_len)
622 .collect::<String>();
623 let mut ix = chunk_len;
624 while !text.is_char_boundary(ix) {
625 ix -= 1;
626 }
627 let text = &text[..ix];
628
629 log::info!("Chunk: {:?}", text);
630 let chunk = Chunk::new(&text);
631 verify_chunk(chunk.as_slice(), text);
632
633 for _ in 0..10 {
634 let mut start = rng.gen_range(0..=chunk.text.len());
635 let mut end = rng.gen_range(start..=chunk.text.len());
636 while !chunk.text.is_char_boundary(start) {
637 start -= 1;
638 }
639 while !chunk.text.is_char_boundary(end) {
640 end -= 1;
641 }
642 let range = start..end;
643 log::info!("Range: {:?}", range);
644 let text_slice = &text[range.clone()];
645 let chunk_slice = chunk.slice(range);
646 verify_chunk(chunk_slice, text_slice);
647 }
648 }
649
650 #[gpui::test(iterations = 1000)]
651 fn test_nth_set_bit_random(mut rng: StdRng) {
652 let set_count = rng.gen_range(0..=128);
653 let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
654 set_bits.sort();
655 let mut n = 0;
656 for ix in set_bits.iter().copied() {
657 n |= 1 << ix;
658 }
659
660 for (mut ix, position) in set_bits.into_iter().enumerate() {
661 ix += 1;
662 assert_eq!(
663 nth_set_bit(n, ix),
664 position,
665 "nth_set_bit({:0128b}, {})",
666 n,
667 ix
668 );
669 }
670 }
671
672 fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
673 let mut offset = 0;
674 let mut offset_utf16 = OffsetUtf16(0);
675 let mut point = Point::zero();
676 let mut point_utf16 = PointUtf16::zero();
677
678 log::info!("Verifying chunk {:?}", text);
679 assert_eq!(chunk.offset_to_point(0), Point::zero());
680
681 let mut expected_tab_positions = Vec::new();
682
683 for (char_offset, c) in text.chars().enumerate() {
684 let expected_point = chunk.offset_to_point(offset);
685 assert_eq!(point, expected_point, "mismatch at offset {}", offset);
686 assert_eq!(
687 chunk.point_to_offset(point),
688 offset,
689 "mismatch at point {:?}",
690 point
691 );
692 assert_eq!(
693 chunk.offset_to_offset_utf16(offset),
694 offset_utf16,
695 "mismatch at offset {}",
696 offset
697 );
698 assert_eq!(
699 chunk.offset_utf16_to_offset(offset_utf16),
700 offset,
701 "mismatch at offset_utf16 {:?}",
702 offset_utf16
703 );
704 assert_eq!(
705 chunk.point_to_point_utf16(point),
706 point_utf16,
707 "mismatch at point {:?}",
708 point
709 );
710 assert_eq!(
711 chunk.point_utf16_to_offset(point_utf16, false),
712 offset,
713 "mismatch at point_utf16 {:?}",
714 point_utf16
715 );
716 assert_eq!(
717 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
718 point,
719 "mismatch for unclipped_point_utf16_to_point at {:?}",
720 point_utf16
721 );
722
723 assert_eq!(
724 chunk.clip_point(point, Bias::Left),
725 point,
726 "incorrect left clip at {:?}",
727 point
728 );
729 assert_eq!(
730 chunk.clip_point(point, Bias::Right),
731 point,
732 "incorrect right clip at {:?}",
733 point
734 );
735
736 for i in 1..c.len_utf8() {
737 let test_point = Point::new(point.row, point.column + i as u32);
738 assert_eq!(
739 chunk.clip_point(test_point, Bias::Left),
740 point,
741 "incorrect left clip within multi-byte char at {:?}",
742 test_point
743 );
744 assert_eq!(
745 chunk.clip_point(test_point, Bias::Right),
746 Point::new(point.row, point.column + c.len_utf8() as u32),
747 "incorrect right clip within multi-byte char at {:?}",
748 test_point
749 );
750 }
751
752 for i in 1..c.len_utf16() {
753 let test_point = Unclipped(PointUtf16::new(
754 point_utf16.row,
755 point_utf16.column + i as u32,
756 ));
757 assert_eq!(
758 chunk.unclipped_point_utf16_to_point(test_point),
759 point,
760 "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
761 test_point
762 );
763 assert_eq!(
764 chunk.clip_point_utf16(test_point, Bias::Left),
765 point_utf16,
766 "incorrect left clip_point_utf16 within multi-byte char at {:?}",
767 test_point
768 );
769 assert_eq!(
770 chunk.clip_point_utf16(test_point, Bias::Right),
771 PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
772 "incorrect right clip_point_utf16 within multi-byte char at {:?}",
773 test_point
774 );
775
776 let test_offset = OffsetUtf16(offset_utf16.0 + i);
777 assert_eq!(
778 chunk.clip_offset_utf16(test_offset, Bias::Left),
779 offset_utf16,
780 "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
781 test_offset
782 );
783 assert_eq!(
784 chunk.clip_offset_utf16(test_offset, Bias::Right),
785 OffsetUtf16(offset_utf16.0 + c.len_utf16()),
786 "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
787 test_offset
788 );
789 }
790
791 if c == '\n' {
792 point.row += 1;
793 point.column = 0;
794 point_utf16.row += 1;
795 point_utf16.column = 0;
796 } else {
797 point.column += c.len_utf8() as u32;
798 point_utf16.column += c.len_utf16() as u32;
799 }
800
801 if c == '\t' {
802 expected_tab_positions.push(TabPosition {
803 byte_offset: offset,
804 char_offset,
805 });
806 }
807
808 offset += c.len_utf8();
809 offset_utf16.0 += c.len_utf16();
810 }
811
812 let final_point = chunk.offset_to_point(offset);
813 assert_eq!(point, final_point, "mismatch at final offset {}", offset);
814 assert_eq!(
815 chunk.point_to_offset(point),
816 offset,
817 "mismatch at point {:?}",
818 point
819 );
820 assert_eq!(
821 chunk.offset_to_offset_utf16(offset),
822 offset_utf16,
823 "mismatch at offset {}",
824 offset
825 );
826 assert_eq!(
827 chunk.offset_utf16_to_offset(offset_utf16),
828 offset,
829 "mismatch at offset_utf16 {:?}",
830 offset_utf16
831 );
832 assert_eq!(
833 chunk.point_to_point_utf16(point),
834 point_utf16,
835 "mismatch at final point {:?}",
836 point
837 );
838 assert_eq!(
839 chunk.point_utf16_to_offset(point_utf16, false),
840 offset,
841 "mismatch at final point_utf16 {:?}",
842 point_utf16
843 );
844 assert_eq!(
845 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
846 point,
847 "mismatch for unclipped_point_utf16_to_point at final point {:?}",
848 point_utf16
849 );
850 assert_eq!(
851 chunk.clip_point(point, Bias::Left),
852 point,
853 "incorrect left clip at final point {:?}",
854 point
855 );
856 assert_eq!(
857 chunk.clip_point(point, Bias::Right),
858 point,
859 "incorrect right clip at final point {:?}",
860 point
861 );
862 assert_eq!(
863 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
864 point_utf16,
865 "incorrect left clip_point_utf16 at final point {:?}",
866 point_utf16
867 );
868 assert_eq!(
869 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
870 point_utf16,
871 "incorrect right clip_point_utf16 at final point {:?}",
872 point_utf16
873 );
874 assert_eq!(
875 chunk.clip_offset_utf16(offset_utf16, Bias::Left),
876 offset_utf16,
877 "incorrect left clip_offset_utf16 at final offset {:?}",
878 offset_utf16
879 );
880 assert_eq!(
881 chunk.clip_offset_utf16(offset_utf16, Bias::Right),
882 offset_utf16,
883 "incorrect right clip_offset_utf16 at final offset {:?}",
884 offset_utf16
885 );
886
887 // Verify length methods
888 assert_eq!(chunk.len(), text.len());
889 assert_eq!(
890 chunk.len_utf16().0,
891 text.chars().map(|c| c.len_utf16()).sum::<usize>()
892 );
893
894 // Verify line counting
895 let lines = chunk.lines();
896 let mut newline_count = 0;
897 let mut last_line_len = 0;
898 for c in text.chars() {
899 if c == '\n' {
900 newline_count += 1;
901 last_line_len = 0;
902 } else {
903 last_line_len += c.len_utf8() as u32;
904 }
905 }
906 assert_eq!(lines, Point::new(newline_count, last_line_len));
907
908 // Verify first/last line chars
909 if !text.is_empty() {
910 let first_line = text.split('\n').next().unwrap();
911 assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
912
913 let last_line = text.split('\n').last().unwrap();
914 assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
915 assert_eq!(
916 chunk.last_line_len_utf16(),
917 last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
918 );
919 }
920
921 // Verify longest row
922 let (longest_row, longest_chars) = chunk.longest_row();
923 let mut max_chars = 0;
924 let mut current_row = 0;
925 let mut current_chars = 0;
926 let mut max_row = 0;
927
928 for c in text.chars() {
929 if c == '\n' {
930 if current_chars > max_chars {
931 max_chars = current_chars;
932 max_row = current_row;
933 }
934 current_row += 1;
935 current_chars = 0;
936 } else {
937 current_chars += 1;
938 }
939 }
940
941 if current_chars > max_chars {
942 max_chars = current_chars;
943 max_row = current_row;
944 }
945
946 assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
947 assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
948 }
949}