1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
2use arrayvec::ArrayString;
3use std::{cmp, ops::Range};
4use sum_tree::Bias;
5use unicode_segmentation::GraphemeCursor;
6use util::debug_panic;
7
8pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
9pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
10
11#[derive(Clone, Debug, Default)]
12pub struct Chunk {
13 chars: u128,
14 chars_utf16: u128,
15 newlines: u128,
16 pub tabs: u128,
17 pub text: ArrayString<MAX_BASE>,
18}
19
20impl Chunk {
21 #[inline(always)]
22 pub fn new(text: &str) -> Self {
23 let mut this = Chunk::default();
24 this.push_str(text);
25 this
26 }
27
28 #[inline(always)]
29 pub fn push_str(&mut self, text: &str) {
30 for (char_ix, c) in text.char_indices() {
31 let ix = self.text.len() + char_ix;
32 self.chars |= 1 << ix;
33 self.chars_utf16 |= 1 << ix;
34 self.chars_utf16 |= (c.len_utf16() as u128) << ix;
35 self.newlines |= ((c == '\n') as u128) << ix;
36 self.tabs |= ((c == '\t') as u128) << ix;
37 }
38 self.text.push_str(text);
39 }
40
41 #[inline(always)]
42 pub fn append(&mut self, slice: ChunkSlice) {
43 if slice.is_empty() {
44 return;
45 };
46
47 let base_ix = self.text.len();
48 self.chars |= slice.chars << base_ix;
49 self.chars_utf16 |= slice.chars_utf16 << base_ix;
50 self.newlines |= slice.newlines << base_ix;
51 self.tabs |= slice.tabs << base_ix;
52 self.text.push_str(slice.text);
53 }
54
55 #[inline(always)]
56 pub fn as_slice(&self) -> ChunkSlice<'_> {
57 ChunkSlice {
58 chars: self.chars,
59 chars_utf16: self.chars_utf16,
60 newlines: self.newlines,
61 tabs: self.tabs,
62 text: &self.text,
63 }
64 }
65
66 #[inline(always)]
67 pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
68 self.as_slice().slice(range)
69 }
70
71 #[inline(always)]
72 pub fn chars(&self) -> u128 {
73 self.chars
74 }
75}
76
77#[derive(Clone, Copy, Debug)]
78pub struct ChunkSlice<'a> {
79 chars: u128,
80 chars_utf16: u128,
81 newlines: u128,
82 tabs: u128,
83 text: &'a str,
84}
85
86impl Into<Chunk> for ChunkSlice<'_> {
87 fn into(self) -> Chunk {
88 Chunk {
89 chars: self.chars,
90 chars_utf16: self.chars_utf16,
91 newlines: self.newlines,
92 tabs: self.tabs,
93 text: self.text.try_into().unwrap(),
94 }
95 }
96}
97
98impl<'a> ChunkSlice<'a> {
99 #[inline(always)]
100 pub fn is_empty(&self) -> bool {
101 self.text.is_empty()
102 }
103
104 #[inline(always)]
105 pub fn is_char_boundary(self, offset: usize) -> bool {
106 self.text.is_char_boundary(offset)
107 }
108
109 #[inline(always)]
110 pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
111 if mid == MAX_BASE {
112 let left = self;
113 let right = ChunkSlice {
114 chars: 0,
115 chars_utf16: 0,
116 newlines: 0,
117 tabs: 0,
118 text: "",
119 };
120 (left, right)
121 } else {
122 let mask = (1u128 << mid) - 1;
123 let (left_text, right_text) = self.text.split_at(mid);
124 let left = ChunkSlice {
125 chars: self.chars & mask,
126 chars_utf16: self.chars_utf16 & mask,
127 newlines: self.newlines & mask,
128 tabs: self.tabs & mask,
129 text: left_text,
130 };
131 let right = ChunkSlice {
132 chars: self.chars >> mid,
133 chars_utf16: self.chars_utf16 >> mid,
134 newlines: self.newlines >> mid,
135 tabs: self.tabs >> mid,
136 text: right_text,
137 };
138 (left, right)
139 }
140 }
141
142 #[inline(always)]
143 pub fn slice(self, range: Range<usize>) -> Self {
144 let mask = if range.end == MAX_BASE {
145 u128::MAX
146 } else {
147 (1u128 << range.end) - 1
148 };
149 if range.start == MAX_BASE {
150 Self {
151 chars: 0,
152 chars_utf16: 0,
153 newlines: 0,
154 tabs: 0,
155 text: "",
156 }
157 } else {
158 Self {
159 chars: (self.chars & mask) >> range.start,
160 chars_utf16: (self.chars_utf16 & mask) >> range.start,
161 newlines: (self.newlines & mask) >> range.start,
162 tabs: (self.tabs & mask) >> range.start,
163 text: &self.text[range],
164 }
165 }
166 }
167
168 #[inline(always)]
169 pub fn text_summary(&self) -> TextSummary {
170 let mut chars = 0;
171 let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
172 TextSummary {
173 len: self.len(),
174 chars,
175 len_utf16: self.len_utf16(),
176 lines: self.lines(),
177 first_line_chars: self.first_line_chars(),
178 last_line_chars: self.last_line_chars(),
179 last_line_len_utf16: self.last_line_len_utf16(),
180 longest_row,
181 longest_row_chars,
182 }
183 }
184
185 /// Get length in bytes
186 #[inline(always)]
187 pub fn len(&self) -> usize {
188 self.text.len()
189 }
190
191 /// Get length in UTF-16 code units
192 #[inline(always)]
193 pub fn len_utf16(&self) -> OffsetUtf16 {
194 OffsetUtf16(self.chars_utf16.count_ones() as usize)
195 }
196
197 /// Get point representing number of lines and length of last line
198 #[inline(always)]
199 pub fn lines(&self) -> Point {
200 let row = self.newlines.count_ones();
201 let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
202 Point::new(row, column)
203 }
204
205 /// Get number of chars in first line
206 #[inline(always)]
207 pub fn first_line_chars(&self) -> u32 {
208 if self.newlines == 0 {
209 self.chars.count_ones()
210 } else {
211 let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
212 (self.chars & mask).count_ones()
213 }
214 }
215
216 /// Get number of chars in last line
217 #[inline(always)]
218 pub fn last_line_chars(&self) -> u32 {
219 if self.newlines == 0 {
220 self.chars.count_ones()
221 } else {
222 let mask = !(u128::MAX >> self.newlines.leading_zeros());
223 (self.chars & mask).count_ones()
224 }
225 }
226
227 /// Get number of UTF-16 code units in last line
228 #[inline(always)]
229 pub fn last_line_len_utf16(&self) -> u32 {
230 if self.newlines == 0 {
231 self.chars_utf16.count_ones()
232 } else {
233 let mask = !(u128::MAX >> self.newlines.leading_zeros());
234 (self.chars_utf16 & mask).count_ones()
235 }
236 }
237
238 /// Get the longest row in the chunk and its length in characters.
239 /// Calculate the total number of characters in the chunk along the way.
240 #[inline(always)]
241 pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
242 let mut chars = self.chars;
243 let mut newlines = self.newlines;
244 *total_chars = 0;
245 let mut row = 0;
246 let mut longest_row = 0;
247 let mut longest_row_chars = 0;
248 while newlines > 0 {
249 let newline_ix = newlines.trailing_zeros();
250 let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
251 *total_chars += usize::from(row_chars);
252 if row_chars > longest_row_chars {
253 longest_row = row;
254 longest_row_chars = row_chars;
255 }
256
257 newlines >>= newline_ix;
258 newlines >>= 1;
259 chars >>= newline_ix;
260 chars >>= 1;
261 row += 1;
262 *total_chars += 1;
263 }
264
265 let row_chars = chars.count_ones() as u8;
266 *total_chars += usize::from(row_chars);
267 if row_chars > longest_row_chars {
268 (row, row_chars as u32)
269 } else {
270 (longest_row, longest_row_chars as u32)
271 }
272 }
273
274 #[inline(always)]
275 pub fn offset_to_point(&self, offset: usize) -> Point {
276 let mask = if offset == MAX_BASE {
277 u128::MAX
278 } else {
279 (1u128 << offset) - 1
280 };
281 let row = (self.newlines & mask).count_ones();
282 let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
283 let column = (offset - newline_ix as usize) as u32;
284 Point::new(row, column)
285 }
286
287 #[inline(always)]
288 pub fn point_to_offset(&self, point: Point) -> usize {
289 if point.row > self.lines().row {
290 debug_panic!(
291 "point {:?} extends beyond rows for string {:?}",
292 point,
293 self.text
294 );
295 return self.len();
296 }
297
298 let row_offset_range = self.offset_range_for_row(point.row);
299 if point.column > row_offset_range.len() as u32 {
300 debug_panic!(
301 "point {:?} extends beyond row for string {:?}",
302 point,
303 self.text
304 );
305 row_offset_range.end
306 } else {
307 row_offset_range.start + point.column as usize
308 }
309 }
310
311 #[inline(always)]
312 pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
313 let mask = if offset == MAX_BASE {
314 u128::MAX
315 } else {
316 (1u128 << offset) - 1
317 };
318 OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
319 }
320
321 #[inline(always)]
322 pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
323 if target.0 == 0 {
324 0
325 } else {
326 let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
327 if ix == MAX_BASE {
328 MAX_BASE
329 } else {
330 let utf8_additional_len = cmp::min(
331 (self.chars_utf16 >> ix).trailing_zeros() as usize,
332 self.text.len() - ix,
333 );
334 ix + utf8_additional_len
335 }
336 }
337 }
338
339 #[inline(always)]
340 pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
341 let mask = if offset == MAX_BASE {
342 u128::MAX
343 } else {
344 (1u128 << offset) - 1
345 };
346 let row = (self.newlines & mask).count_ones();
347 let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
348 let column = if newline_ix as usize == MAX_BASE {
349 0
350 } else {
351 ((self.chars_utf16 & mask) >> newline_ix).count_ones()
352 };
353 PointUtf16::new(row, column)
354 }
355
356 #[inline(always)]
357 pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
358 self.offset_to_point_utf16(self.point_to_offset(point))
359 }
360
361 #[inline(always)]
362 pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
363 let lines = self.lines();
364 if point.row > lines.row {
365 if !clip {
366 debug_panic!(
367 "point {:?} is beyond this chunk's extent {:?}",
368 point,
369 self.text
370 );
371 }
372 return self.len();
373 }
374
375 let row_offset_range = self.offset_range_for_row(point.row);
376 let line = self.slice(row_offset_range.clone());
377 if point.column > line.last_line_len_utf16() {
378 if !clip {
379 debug_panic!(
380 "point {:?} is beyond the end of the line in chunk {:?}",
381 point,
382 self.text
383 );
384 }
385 return line.len();
386 }
387
388 let mut offset = row_offset_range.start;
389 if point.column > 0 {
390 offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
391 if !self.text.is_char_boundary(offset) {
392 offset -= 1;
393 while !self.text.is_char_boundary(offset) {
394 offset -= 1;
395 }
396 if !clip {
397 debug_panic!(
398 "point {:?} is within character in chunk {:?}",
399 point,
400 self.text,
401 );
402 }
403 }
404 }
405 offset
406 }
407
408 #[inline(always)]
409 pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
410 let max_point = self.lines();
411 if point.0.row > max_point.row {
412 return max_point;
413 }
414
415 let row_offset_range = self.offset_range_for_row(point.0.row);
416 let line = self.slice(row_offset_range);
417 if point.0.column == 0 {
418 Point::new(point.0.row, 0)
419 } else if point.0.column >= line.len_utf16().0 as u32 {
420 Point::new(point.0.row, line.len() as u32)
421 } else {
422 let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
423 while !line.text.is_char_boundary(column) {
424 column -= 1;
425 }
426 Point::new(point.0.row, column as u32)
427 }
428 }
429
430 #[inline(always)]
431 pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
432 let max_point = self.lines();
433 if point.row > max_point.row {
434 return max_point;
435 }
436
437 let line = self.slice(self.offset_range_for_row(point.row));
438 if point.column == 0 {
439 point
440 } else if point.column >= line.len() as u32 {
441 Point::new(point.row, line.len() as u32)
442 } else {
443 let mut column = point.column as usize;
444 let bytes = line.text.as_bytes();
445 if bytes[column - 1] < 128 && bytes[column] < 128 {
446 return Point::new(point.row, column as u32);
447 }
448
449 let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
450 loop {
451 if line.is_char_boundary(column)
452 && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
453 {
454 break;
455 }
456
457 match bias {
458 Bias::Left => column -= 1,
459 Bias::Right => column += 1,
460 }
461 grapheme_cursor.set_cursor(column);
462 }
463 Point::new(point.row, column as u32)
464 }
465 }
466
467 #[inline(always)]
468 pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
469 let max_point = self.lines();
470 if point.0.row > max_point.row {
471 PointUtf16::new(max_point.row, self.last_line_len_utf16())
472 } else {
473 let line = self.slice(self.offset_range_for_row(point.0.row));
474 let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
475 PointUtf16::new(point.0.row, column.0 as u32)
476 }
477 }
478
479 #[inline(always)]
480 pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
481 if target == OffsetUtf16::default() {
482 OffsetUtf16::default()
483 } else if target >= self.len_utf16() {
484 self.len_utf16()
485 } else {
486 let mut offset = self.offset_utf16_to_offset(target);
487 while !self.text.is_char_boundary(offset) {
488 if bias == Bias::Left {
489 offset -= 1;
490 } else {
491 offset += 1;
492 }
493 }
494 self.offset_to_offset_utf16(offset)
495 }
496 }
497
498 #[inline(always)]
499 fn offset_range_for_row(&self, row: u32) -> Range<usize> {
500 let row_start = if row > 0 {
501 nth_set_bit(self.newlines, row as usize) + 1
502 } else {
503 0
504 };
505 let row_len = if row_start == MAX_BASE {
506 0
507 } else {
508 cmp::min(
509 (self.newlines >> row_start).trailing_zeros(),
510 (self.text.len() - row_start) as u32,
511 )
512 };
513 row_start..row_start + row_len as usize
514 }
515
516 #[inline(always)]
517 pub fn tabs(&self) -> Tabs {
518 Tabs {
519 tabs: self.tabs,
520 chars: self.chars,
521 }
522 }
523}
524
525pub struct Tabs {
526 tabs: u128,
527 chars: u128,
528}
529
530#[derive(Debug, PartialEq, Eq)]
531pub struct TabPosition {
532 pub byte_offset: usize,
533 pub char_offset: usize,
534}
535
536impl Iterator for Tabs {
537 type Item = TabPosition;
538
539 fn next(&mut self) -> Option<Self::Item> {
540 if self.tabs == 0 {
541 return None;
542 }
543
544 let tab_offset = self.tabs.trailing_zeros() as usize;
545 let chars_mask = (1 << tab_offset) - 1;
546 let char_offset = (self.chars & chars_mask).count_ones() as usize;
547
548 // Since tabs are 1 byte the tab offset is the same as the byte offset
549 let position = TabPosition {
550 byte_offset: tab_offset,
551 char_offset,
552 };
553 // Remove the tab we've just seen
554 self.tabs ^= 1 << tab_offset;
555
556 Some(position)
557 }
558}
559
560/// Finds the n-th bit that is set to 1.
561#[inline(always)]
562fn nth_set_bit(v: u128, n: usize) -> usize {
563 let low = v as u64;
564 let high = (v >> 64) as u64;
565
566 let low_count = low.count_ones() as usize;
567 if n > low_count {
568 64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
569 } else {
570 nth_set_bit_u64(low, n as u64) as usize
571 }
572}
573
574#[inline(always)]
575fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
576 let v = v.reverse_bits();
577 let mut s: u64 = 64;
578
579 // Parallel bit count intermediates
580 let a = v - ((v >> 1) & (u64::MAX / 3));
581 let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
582 let c = (b + (b >> 4)) & (u64::MAX / 0x11);
583 let d = (c + (c >> 8)) & (u64::MAX / 0x101);
584
585 // Branchless select
586 let t = (d >> 32) + (d >> 48);
587 s -= (t.wrapping_sub(n) & 256) >> 3;
588 n -= t & (t.wrapping_sub(n) >> 8);
589
590 let t = (d >> (s - 16)) & 0xff;
591 s -= (t.wrapping_sub(n) & 256) >> 4;
592 n -= t & (t.wrapping_sub(n) >> 8);
593
594 let t = (c >> (s - 8)) & 0xf;
595 s -= (t.wrapping_sub(n) & 256) >> 5;
596 n -= t & (t.wrapping_sub(n) >> 8);
597
598 let t = (b >> (s - 4)) & 0x7;
599 s -= (t.wrapping_sub(n) & 256) >> 6;
600 n -= t & (t.wrapping_sub(n) >> 8);
601
602 let t = (a >> (s - 2)) & 0x3;
603 s -= (t.wrapping_sub(n) & 256) >> 7;
604 n -= t & (t.wrapping_sub(n) >> 8);
605
606 let t = (v >> (s - 1)) & 0x1;
607 s -= (t.wrapping_sub(n) & 256) >> 8;
608
609 65 - s - 1
610}
611
612#[cfg(test)]
613mod tests {
614 use super::*;
615 use rand::prelude::*;
616 use util::RandomCharIter;
617
618 #[gpui::test(iterations = 100)]
619 fn test_random_chunks(mut rng: StdRng) {
620 let chunk_len = rng.random_range(0..=MAX_BASE);
621 let text = RandomCharIter::new(&mut rng)
622 .take(chunk_len)
623 .collect::<String>();
624 let mut ix = chunk_len;
625 while !text.is_char_boundary(ix) {
626 ix -= 1;
627 }
628 let text = &text[..ix];
629
630 log::info!("Chunk: {:?}", text);
631 let chunk = Chunk::new(text);
632 verify_chunk(chunk.as_slice(), text);
633
634 for _ in 0..10 {
635 let mut start = rng.random_range(0..=chunk.text.len());
636 let mut end = rng.random_range(start..=chunk.text.len());
637 while !chunk.text.is_char_boundary(start) {
638 start -= 1;
639 }
640 while !chunk.text.is_char_boundary(end) {
641 end -= 1;
642 }
643 let range = start..end;
644 log::info!("Range: {:?}", range);
645 let text_slice = &text[range.clone()];
646 let chunk_slice = chunk.slice(range);
647 verify_chunk(chunk_slice, text_slice);
648 }
649 }
650
651 #[gpui::test(iterations = 1000)]
652 fn test_nth_set_bit_random(mut rng: StdRng) {
653 let set_count = rng.random_range(0..=128);
654 let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
655 set_bits.sort();
656 let mut n = 0;
657 for ix in set_bits.iter().copied() {
658 n |= 1 << ix;
659 }
660
661 for (mut ix, position) in set_bits.into_iter().enumerate() {
662 ix += 1;
663 assert_eq!(
664 nth_set_bit(n, ix),
665 position,
666 "nth_set_bit({:0128b}, {})",
667 n,
668 ix
669 );
670 }
671 }
672
673 fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
674 let mut offset = 0;
675 let mut offset_utf16 = OffsetUtf16(0);
676 let mut point = Point::zero();
677 let mut point_utf16 = PointUtf16::zero();
678
679 log::info!("Verifying chunk {:?}", text);
680 assert_eq!(chunk.offset_to_point(0), Point::zero());
681
682 let mut expected_tab_positions = Vec::new();
683
684 for (char_offset, c) in text.chars().enumerate() {
685 let expected_point = chunk.offset_to_point(offset);
686 assert_eq!(point, expected_point, "mismatch at offset {}", offset);
687 assert_eq!(
688 chunk.point_to_offset(point),
689 offset,
690 "mismatch at point {:?}",
691 point
692 );
693 assert_eq!(
694 chunk.offset_to_offset_utf16(offset),
695 offset_utf16,
696 "mismatch at offset {}",
697 offset
698 );
699 assert_eq!(
700 chunk.offset_utf16_to_offset(offset_utf16),
701 offset,
702 "mismatch at offset_utf16 {:?}",
703 offset_utf16
704 );
705 assert_eq!(
706 chunk.point_to_point_utf16(point),
707 point_utf16,
708 "mismatch at point {:?}",
709 point
710 );
711 assert_eq!(
712 chunk.point_utf16_to_offset(point_utf16, false),
713 offset,
714 "mismatch at point_utf16 {:?}",
715 point_utf16
716 );
717 assert_eq!(
718 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
719 point,
720 "mismatch for unclipped_point_utf16_to_point at {:?}",
721 point_utf16
722 );
723
724 assert_eq!(
725 chunk.clip_point(point, Bias::Left),
726 point,
727 "incorrect left clip at {:?}",
728 point
729 );
730 assert_eq!(
731 chunk.clip_point(point, Bias::Right),
732 point,
733 "incorrect right clip at {:?}",
734 point
735 );
736
737 for i in 1..c.len_utf8() {
738 let test_point = Point::new(point.row, point.column + i as u32);
739 assert_eq!(
740 chunk.clip_point(test_point, Bias::Left),
741 point,
742 "incorrect left clip within multi-byte char at {:?}",
743 test_point
744 );
745 assert_eq!(
746 chunk.clip_point(test_point, Bias::Right),
747 Point::new(point.row, point.column + c.len_utf8() as u32),
748 "incorrect right clip within multi-byte char at {:?}",
749 test_point
750 );
751 }
752
753 for i in 1..c.len_utf16() {
754 let test_point = Unclipped(PointUtf16::new(
755 point_utf16.row,
756 point_utf16.column + i as u32,
757 ));
758 assert_eq!(
759 chunk.unclipped_point_utf16_to_point(test_point),
760 point,
761 "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
762 test_point
763 );
764 assert_eq!(
765 chunk.clip_point_utf16(test_point, Bias::Left),
766 point_utf16,
767 "incorrect left clip_point_utf16 within multi-byte char at {:?}",
768 test_point
769 );
770 assert_eq!(
771 chunk.clip_point_utf16(test_point, Bias::Right),
772 PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
773 "incorrect right clip_point_utf16 within multi-byte char at {:?}",
774 test_point
775 );
776
777 let test_offset = OffsetUtf16(offset_utf16.0 + i);
778 assert_eq!(
779 chunk.clip_offset_utf16(test_offset, Bias::Left),
780 offset_utf16,
781 "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
782 test_offset
783 );
784 assert_eq!(
785 chunk.clip_offset_utf16(test_offset, Bias::Right),
786 OffsetUtf16(offset_utf16.0 + c.len_utf16()),
787 "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
788 test_offset
789 );
790 }
791
792 if c == '\n' {
793 point.row += 1;
794 point.column = 0;
795 point_utf16.row += 1;
796 point_utf16.column = 0;
797 } else {
798 point.column += c.len_utf8() as u32;
799 point_utf16.column += c.len_utf16() as u32;
800 }
801
802 if c == '\t' {
803 expected_tab_positions.push(TabPosition {
804 byte_offset: offset,
805 char_offset,
806 });
807 }
808
809 offset += c.len_utf8();
810 offset_utf16.0 += c.len_utf16();
811 }
812
813 let final_point = chunk.offset_to_point(offset);
814 assert_eq!(point, final_point, "mismatch at final offset {}", offset);
815 assert_eq!(
816 chunk.point_to_offset(point),
817 offset,
818 "mismatch at point {:?}",
819 point
820 );
821 assert_eq!(
822 chunk.offset_to_offset_utf16(offset),
823 offset_utf16,
824 "mismatch at offset {}",
825 offset
826 );
827 assert_eq!(
828 chunk.offset_utf16_to_offset(offset_utf16),
829 offset,
830 "mismatch at offset_utf16 {:?}",
831 offset_utf16
832 );
833 assert_eq!(
834 chunk.point_to_point_utf16(point),
835 point_utf16,
836 "mismatch at final point {:?}",
837 point
838 );
839 assert_eq!(
840 chunk.point_utf16_to_offset(point_utf16, false),
841 offset,
842 "mismatch at final point_utf16 {:?}",
843 point_utf16
844 );
845 assert_eq!(
846 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
847 point,
848 "mismatch for unclipped_point_utf16_to_point at final point {:?}",
849 point_utf16
850 );
851 assert_eq!(
852 chunk.clip_point(point, Bias::Left),
853 point,
854 "incorrect left clip at final point {:?}",
855 point
856 );
857 assert_eq!(
858 chunk.clip_point(point, Bias::Right),
859 point,
860 "incorrect right clip at final point {:?}",
861 point
862 );
863 assert_eq!(
864 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
865 point_utf16,
866 "incorrect left clip_point_utf16 at final point {:?}",
867 point_utf16
868 );
869 assert_eq!(
870 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
871 point_utf16,
872 "incorrect right clip_point_utf16 at final point {:?}",
873 point_utf16
874 );
875 assert_eq!(
876 chunk.clip_offset_utf16(offset_utf16, Bias::Left),
877 offset_utf16,
878 "incorrect left clip_offset_utf16 at final offset {:?}",
879 offset_utf16
880 );
881 assert_eq!(
882 chunk.clip_offset_utf16(offset_utf16, Bias::Right),
883 offset_utf16,
884 "incorrect right clip_offset_utf16 at final offset {:?}",
885 offset_utf16
886 );
887
888 // Verify length methods
889 assert_eq!(chunk.len(), text.len());
890 assert_eq!(
891 chunk.len_utf16().0,
892 text.chars().map(|c| c.len_utf16()).sum::<usize>()
893 );
894
895 // Verify line counting
896 let lines = chunk.lines();
897 let mut newline_count = 0;
898 let mut last_line_len = 0;
899 for c in text.chars() {
900 if c == '\n' {
901 newline_count += 1;
902 last_line_len = 0;
903 } else {
904 last_line_len += c.len_utf8() as u32;
905 }
906 }
907 assert_eq!(lines, Point::new(newline_count, last_line_len));
908
909 // Verify first/last line chars
910 if !text.is_empty() {
911 let first_line = text.split('\n').next().unwrap();
912 assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
913
914 let last_line = text.split('\n').next_back().unwrap();
915 assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
916 assert_eq!(
917 chunk.last_line_len_utf16(),
918 last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
919 );
920 }
921
922 // Verify longest row
923 let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
924 let mut max_chars = 0;
925 let mut current_row = 0;
926 let mut current_chars = 0;
927 let mut max_row = 0;
928
929 for c in text.chars() {
930 if c == '\n' {
931 if current_chars > max_chars {
932 max_chars = current_chars;
933 max_row = current_row;
934 }
935 current_row += 1;
936 current_chars = 0;
937 } else {
938 current_chars += 1;
939 }
940 }
941
942 if current_chars > max_chars {
943 max_chars = current_chars;
944 max_row = current_row;
945 }
946
947 assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
948 assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
949 }
950}