1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
2use arrayvec::ArrayString;
3use std::{cmp, ops::Range};
4use sum_tree::Bias;
5use unicode_segmentation::GraphemeCursor;
6use util::debug_panic;
7
8pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
9pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
10
11#[derive(Clone, Debug, Default)]
12pub struct Chunk {
13 chars: u128,
14 chars_utf16: u128,
15 newlines: u128,
16 tabs: u128,
17 pub text: ArrayString<MAX_BASE>,
18}
19
20impl Chunk {
21 #[inline(always)]
22 pub fn new(text: &str) -> Self {
23 let mut this = Chunk::default();
24 this.push_str(text);
25 this
26 }
27
28 #[inline(always)]
29 pub fn push_str(&mut self, text: &str) {
30 for (char_ix, c) in text.char_indices() {
31 let ix = self.text.len() + char_ix;
32 self.chars |= 1 << ix;
33 self.chars_utf16 |= 1 << ix;
34 self.chars_utf16 |= (c.len_utf16() as u128) << ix;
35 self.newlines |= ((c == '\n') as u128) << ix;
36 self.tabs |= ((c == '\t') as u128) << ix;
37 }
38 self.text.push_str(text);
39 }
40
41 #[inline(always)]
42 pub fn append(&mut self, slice: ChunkSlice) {
43 if slice.is_empty() {
44 return;
45 };
46
47 let base_ix = self.text.len();
48 self.chars |= slice.chars << base_ix;
49 self.chars_utf16 |= slice.chars_utf16 << base_ix;
50 self.newlines |= slice.newlines << base_ix;
51 self.tabs |= slice.tabs << base_ix;
52 self.text.push_str(slice.text);
53 }
54
55 #[inline(always)]
56 pub fn as_slice(&self) -> ChunkSlice<'_> {
57 ChunkSlice {
58 chars: self.chars,
59 chars_utf16: self.chars_utf16,
60 newlines: self.newlines,
61 tabs: self.tabs,
62 text: &self.text,
63 }
64 }
65
66 #[inline(always)]
67 pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
68 self.as_slice().slice(range)
69 }
70}
71
72#[derive(Clone, Copy, Debug)]
73pub struct ChunkSlice<'a> {
74 chars: u128,
75 chars_utf16: u128,
76 newlines: u128,
77 tabs: u128,
78 text: &'a str,
79}
80
81impl Into<Chunk> for ChunkSlice<'_> {
82 fn into(self) -> Chunk {
83 Chunk {
84 chars: self.chars,
85 chars_utf16: self.chars_utf16,
86 newlines: self.newlines,
87 tabs: self.tabs,
88 text: self.text.try_into().unwrap(),
89 }
90 }
91}
92
93impl<'a> ChunkSlice<'a> {
94 #[inline(always)]
95 pub fn is_empty(&self) -> bool {
96 self.text.is_empty()
97 }
98
99 #[inline(always)]
100 pub fn is_char_boundary(self, offset: usize) -> bool {
101 self.text.is_char_boundary(offset)
102 }
103
104 #[inline(always)]
105 pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
106 if mid == MAX_BASE {
107 let left = self;
108 let right = ChunkSlice {
109 chars: 0,
110 chars_utf16: 0,
111 newlines: 0,
112 tabs: 0,
113 text: "",
114 };
115 (left, right)
116 } else {
117 let mask = (1u128 << mid) - 1;
118 let (left_text, right_text) = self.text.split_at(mid);
119 let left = ChunkSlice {
120 chars: self.chars & mask,
121 chars_utf16: self.chars_utf16 & mask,
122 newlines: self.newlines & mask,
123 tabs: self.tabs & mask,
124 text: left_text,
125 };
126 let right = ChunkSlice {
127 chars: self.chars >> mid,
128 chars_utf16: self.chars_utf16 >> mid,
129 newlines: self.newlines >> mid,
130 tabs: self.tabs >> mid,
131 text: right_text,
132 };
133 (left, right)
134 }
135 }
136
137 #[inline(always)]
138 pub fn slice(self, range: Range<usize>) -> Self {
139 let mask = if range.end == MAX_BASE {
140 u128::MAX
141 } else {
142 (1u128 << range.end) - 1
143 };
144 if range.start == MAX_BASE {
145 Self {
146 chars: 0,
147 chars_utf16: 0,
148 newlines: 0,
149 tabs: 0,
150 text: "",
151 }
152 } else {
153 Self {
154 chars: (self.chars & mask) >> range.start,
155 chars_utf16: (self.chars_utf16 & mask) >> range.start,
156 newlines: (self.newlines & mask) >> range.start,
157 tabs: (self.tabs & mask) >> range.start,
158 text: &self.text[range],
159 }
160 }
161 }
162
163 #[inline(always)]
164 pub fn text_summary(&self) -> TextSummary {
165 let mut chars = 0;
166 let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
167 TextSummary {
168 len: self.len(),
169 chars,
170 len_utf16: self.len_utf16(),
171 lines: self.lines(),
172 first_line_chars: self.first_line_chars(),
173 last_line_chars: self.last_line_chars(),
174 last_line_len_utf16: self.last_line_len_utf16(),
175 longest_row,
176 longest_row_chars,
177 }
178 }
179
180 /// Get length in bytes
181 #[inline(always)]
182 pub fn len(&self) -> usize {
183 self.text.len()
184 }
185
186 /// Get length in UTF-16 code units
187 #[inline(always)]
188 pub fn len_utf16(&self) -> OffsetUtf16 {
189 OffsetUtf16(self.chars_utf16.count_ones() as usize)
190 }
191
192 /// Get point representing number of lines and length of last line
193 #[inline(always)]
194 pub fn lines(&self) -> Point {
195 let row = self.newlines.count_ones();
196 let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
197 Point::new(row, column)
198 }
199
200 /// Get number of chars in first line
201 #[inline(always)]
202 pub fn first_line_chars(&self) -> u32 {
203 if self.newlines == 0 {
204 self.chars.count_ones()
205 } else {
206 let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
207 (self.chars & mask).count_ones()
208 }
209 }
210
211 /// Get number of chars in last line
212 #[inline(always)]
213 pub fn last_line_chars(&self) -> u32 {
214 if self.newlines == 0 {
215 self.chars.count_ones()
216 } else {
217 let mask = !(u128::MAX >> self.newlines.leading_zeros());
218 (self.chars & mask).count_ones()
219 }
220 }
221
222 /// Get number of UTF-16 code units in last line
223 #[inline(always)]
224 pub fn last_line_len_utf16(&self) -> u32 {
225 if self.newlines == 0 {
226 self.chars_utf16.count_ones()
227 } else {
228 let mask = !(u128::MAX >> self.newlines.leading_zeros());
229 (self.chars_utf16 & mask).count_ones()
230 }
231 }
232
233 /// Get the longest row in the chunk and its length in characters.
234 /// Calculate the total number of characters in the chunk along the way.
235 #[inline(always)]
236 pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
237 let mut chars = self.chars;
238 let mut newlines = self.newlines;
239 *total_chars = 0;
240 let mut row = 0;
241 let mut longest_row = 0;
242 let mut longest_row_chars = 0;
243 while newlines > 0 {
244 let newline_ix = newlines.trailing_zeros();
245 let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
246 *total_chars += usize::from(row_chars);
247 if row_chars > longest_row_chars {
248 longest_row = row;
249 longest_row_chars = row_chars;
250 }
251
252 newlines >>= newline_ix;
253 newlines >>= 1;
254 chars >>= newline_ix;
255 chars >>= 1;
256 row += 1;
257 *total_chars += 1;
258 }
259
260 let row_chars = chars.count_ones() as u8;
261 *total_chars += usize::from(row_chars);
262 if row_chars > longest_row_chars {
263 (row, row_chars as u32)
264 } else {
265 (longest_row, longest_row_chars as u32)
266 }
267 }
268
269 #[inline(always)]
270 pub fn offset_to_point(&self, offset: usize) -> Point {
271 let mask = if offset == MAX_BASE {
272 u128::MAX
273 } else {
274 (1u128 << offset) - 1
275 };
276 let row = (self.newlines & mask).count_ones();
277 let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
278 let column = (offset - newline_ix as usize) as u32;
279 Point::new(row, column)
280 }
281
282 #[inline(always)]
283 pub fn point_to_offset(&self, point: Point) -> usize {
284 if point.row > self.lines().row {
285 debug_panic!(
286 "point {:?} extends beyond rows for string {:?}",
287 point,
288 self.text
289 );
290 return self.len();
291 }
292
293 let row_offset_range = self.offset_range_for_row(point.row);
294 if point.column > row_offset_range.len() as u32 {
295 debug_panic!(
296 "point {:?} extends beyond row for string {:?}",
297 point,
298 self.text
299 );
300 row_offset_range.end
301 } else {
302 row_offset_range.start + point.column as usize
303 }
304 }
305
306 #[inline(always)]
307 pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
308 let mask = if offset == MAX_BASE {
309 u128::MAX
310 } else {
311 (1u128 << offset) - 1
312 };
313 OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
314 }
315
316 #[inline(always)]
317 pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
318 if target.0 == 0 {
319 0
320 } else {
321 let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
322 if ix == MAX_BASE {
323 MAX_BASE
324 } else {
325 let utf8_additional_len = cmp::min(
326 (self.chars_utf16 >> ix).trailing_zeros() as usize,
327 self.text.len() - ix,
328 );
329 ix + utf8_additional_len
330 }
331 }
332 }
333
334 #[inline(always)]
335 pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
336 let mask = if offset == MAX_BASE {
337 u128::MAX
338 } else {
339 (1u128 << offset) - 1
340 };
341 let row = (self.newlines & mask).count_ones();
342 let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
343 let column = if newline_ix as usize == MAX_BASE {
344 0
345 } else {
346 ((self.chars_utf16 & mask) >> newline_ix).count_ones()
347 };
348 PointUtf16::new(row, column)
349 }
350
351 #[inline(always)]
352 pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
353 self.offset_to_point_utf16(self.point_to_offset(point))
354 }
355
356 #[inline(always)]
357 pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
358 let lines = self.lines();
359 if point.row > lines.row {
360 if !clip {
361 debug_panic!(
362 "point {:?} is beyond this chunk's extent {:?}",
363 point,
364 self.text
365 );
366 }
367 return self.len();
368 }
369
370 let row_offset_range = self.offset_range_for_row(point.row);
371 let line = self.slice(row_offset_range.clone());
372 if point.column > line.last_line_len_utf16() {
373 if !clip {
374 debug_panic!(
375 "point {:?} is beyond the end of the line in chunk {:?}",
376 point,
377 self.text
378 );
379 }
380 return line.len();
381 }
382
383 let mut offset = row_offset_range.start;
384 if point.column > 0 {
385 offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
386 if !self.text.is_char_boundary(offset) {
387 offset -= 1;
388 while !self.text.is_char_boundary(offset) {
389 offset -= 1;
390 }
391 if !clip {
392 debug_panic!(
393 "point {:?} is within character in chunk {:?}",
394 point,
395 self.text,
396 );
397 }
398 }
399 }
400 offset
401 }
402
403 #[inline(always)]
404 pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
405 let max_point = self.lines();
406 if point.0.row > max_point.row {
407 return max_point;
408 }
409
410 let row_offset_range = self.offset_range_for_row(point.0.row);
411 let line = self.slice(row_offset_range);
412 if point.0.column == 0 {
413 Point::new(point.0.row, 0)
414 } else if point.0.column >= line.len_utf16().0 as u32 {
415 Point::new(point.0.row, line.len() as u32)
416 } else {
417 let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
418 while !line.text.is_char_boundary(column) {
419 column -= 1;
420 }
421 Point::new(point.0.row, column as u32)
422 }
423 }
424
425 #[inline(always)]
426 pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
427 let max_point = self.lines();
428 if point.row > max_point.row {
429 return max_point;
430 }
431
432 let line = self.slice(self.offset_range_for_row(point.row));
433 if point.column == 0 {
434 point
435 } else if point.column >= line.len() as u32 {
436 Point::new(point.row, line.len() as u32)
437 } else {
438 let mut column = point.column as usize;
439 let bytes = line.text.as_bytes();
440 if bytes[column - 1] < 128 && bytes[column] < 128 {
441 return Point::new(point.row, column as u32);
442 }
443
444 let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
445 loop {
446 if line.is_char_boundary(column)
447 && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
448 {
449 break;
450 }
451
452 match bias {
453 Bias::Left => column -= 1,
454 Bias::Right => column += 1,
455 }
456 grapheme_cursor.set_cursor(column);
457 }
458 Point::new(point.row, column as u32)
459 }
460 }
461
462 #[inline(always)]
463 pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
464 let max_point = self.lines();
465 if point.0.row > max_point.row {
466 PointUtf16::new(max_point.row, self.last_line_len_utf16())
467 } else {
468 let line = self.slice(self.offset_range_for_row(point.0.row));
469 let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
470 PointUtf16::new(point.0.row, column.0 as u32)
471 }
472 }
473
474 #[inline(always)]
475 pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
476 if target == OffsetUtf16::default() {
477 OffsetUtf16::default()
478 } else if target >= self.len_utf16() {
479 self.len_utf16()
480 } else {
481 let mut offset = self.offset_utf16_to_offset(target);
482 while !self.text.is_char_boundary(offset) {
483 if bias == Bias::Left {
484 offset -= 1;
485 } else {
486 offset += 1;
487 }
488 }
489 self.offset_to_offset_utf16(offset)
490 }
491 }
492
493 #[inline(always)]
494 fn offset_range_for_row(&self, row: u32) -> Range<usize> {
495 let row_start = if row > 0 {
496 nth_set_bit(self.newlines, row as usize) + 1
497 } else {
498 0
499 };
500 let row_len = if row_start == MAX_BASE {
501 0
502 } else {
503 cmp::min(
504 (self.newlines >> row_start).trailing_zeros(),
505 (self.text.len() - row_start) as u32,
506 )
507 };
508 row_start..row_start + row_len as usize
509 }
510
511 #[inline(always)]
512 pub fn tabs(&self) -> Tabs {
513 Tabs {
514 tabs: self.tabs,
515 chars: self.chars,
516 }
517 }
518}
519
520pub struct Tabs {
521 tabs: u128,
522 chars: u128,
523}
524
525#[derive(Debug, PartialEq, Eq)]
526pub struct TabPosition {
527 pub byte_offset: usize,
528 pub char_offset: usize,
529}
530
531impl Iterator for Tabs {
532 type Item = TabPosition;
533
534 fn next(&mut self) -> Option<Self::Item> {
535 if self.tabs == 0 {
536 return None;
537 }
538
539 let tab_offset = self.tabs.trailing_zeros() as usize;
540 let chars_mask = (1 << tab_offset) - 1;
541 let char_offset = (self.chars & chars_mask).count_ones() as usize;
542
543 // Since tabs are 1 byte the tab offset is the same as the byte offset
544 let position = TabPosition {
545 byte_offset: tab_offset,
546 char_offset,
547 };
548 // Remove the tab we've just seen
549 self.tabs ^= 1 << tab_offset;
550
551 Some(position)
552 }
553}
554
555/// Finds the n-th bit that is set to 1.
556#[inline(always)]
557fn nth_set_bit(v: u128, n: usize) -> usize {
558 let low = v as u64;
559 let high = (v >> 64) as u64;
560
561 let low_count = low.count_ones() as usize;
562 if n > low_count {
563 64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
564 } else {
565 nth_set_bit_u64(low, n as u64) as usize
566 }
567}
568
569#[inline(always)]
570fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
571 let v = v.reverse_bits();
572 let mut s: u64 = 64;
573
574 // Parallel bit count intermediates
575 let a = v - ((v >> 1) & (u64::MAX / 3));
576 let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
577 let c = (b + (b >> 4)) & (u64::MAX / 0x11);
578 let d = (c + (c >> 8)) & (u64::MAX / 0x101);
579
580 // Branchless select
581 let t = (d >> 32) + (d >> 48);
582 s -= (t.wrapping_sub(n) & 256) >> 3;
583 n -= t & (t.wrapping_sub(n) >> 8);
584
585 let t = (d >> (s - 16)) & 0xff;
586 s -= (t.wrapping_sub(n) & 256) >> 4;
587 n -= t & (t.wrapping_sub(n) >> 8);
588
589 let t = (c >> (s - 8)) & 0xf;
590 s -= (t.wrapping_sub(n) & 256) >> 5;
591 n -= t & (t.wrapping_sub(n) >> 8);
592
593 let t = (b >> (s - 4)) & 0x7;
594 s -= (t.wrapping_sub(n) & 256) >> 6;
595 n -= t & (t.wrapping_sub(n) >> 8);
596
597 let t = (a >> (s - 2)) & 0x3;
598 s -= (t.wrapping_sub(n) & 256) >> 7;
599 n -= t & (t.wrapping_sub(n) >> 8);
600
601 let t = (v >> (s - 1)) & 0x1;
602 s -= (t.wrapping_sub(n) & 256) >> 8;
603
604 65 - s - 1
605}
606
607#[cfg(test)]
608mod tests {
609 use super::*;
610 use rand::prelude::*;
611 use util::RandomCharIter;
612
613 #[gpui::test(iterations = 100)]
614 fn test_random_chunks(mut rng: StdRng) {
615 let chunk_len = rng.random_range(0..=MAX_BASE);
616 let text = RandomCharIter::new(&mut rng)
617 .take(chunk_len)
618 .collect::<String>();
619 let mut ix = chunk_len;
620 while !text.is_char_boundary(ix) {
621 ix -= 1;
622 }
623 let text = &text[..ix];
624
625 log::info!("Chunk: {:?}", text);
626 let chunk = Chunk::new(text);
627 verify_chunk(chunk.as_slice(), text);
628
629 for _ in 0..10 {
630 let mut start = rng.random_range(0..=chunk.text.len());
631 let mut end = rng.random_range(start..=chunk.text.len());
632 while !chunk.text.is_char_boundary(start) {
633 start -= 1;
634 }
635 while !chunk.text.is_char_boundary(end) {
636 end -= 1;
637 }
638 let range = start..end;
639 log::info!("Range: {:?}", range);
640 let text_slice = &text[range.clone()];
641 let chunk_slice = chunk.slice(range);
642 verify_chunk(chunk_slice, text_slice);
643 }
644 }
645
646 #[gpui::test(iterations = 1000)]
647 fn test_nth_set_bit_random(mut rng: StdRng) {
648 let set_count = rng.random_range(0..=128);
649 let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
650 set_bits.sort();
651 let mut n = 0;
652 for ix in set_bits.iter().copied() {
653 n |= 1 << ix;
654 }
655
656 for (mut ix, position) in set_bits.into_iter().enumerate() {
657 ix += 1;
658 assert_eq!(
659 nth_set_bit(n, ix),
660 position,
661 "nth_set_bit({:0128b}, {})",
662 n,
663 ix
664 );
665 }
666 }
667
668 fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
669 let mut offset = 0;
670 let mut offset_utf16 = OffsetUtf16(0);
671 let mut point = Point::zero();
672 let mut point_utf16 = PointUtf16::zero();
673
674 log::info!("Verifying chunk {:?}", text);
675 assert_eq!(chunk.offset_to_point(0), Point::zero());
676
677 let mut expected_tab_positions = Vec::new();
678
679 for (char_offset, c) in text.chars().enumerate() {
680 let expected_point = chunk.offset_to_point(offset);
681 assert_eq!(point, expected_point, "mismatch at offset {}", offset);
682 assert_eq!(
683 chunk.point_to_offset(point),
684 offset,
685 "mismatch at point {:?}",
686 point
687 );
688 assert_eq!(
689 chunk.offset_to_offset_utf16(offset),
690 offset_utf16,
691 "mismatch at offset {}",
692 offset
693 );
694 assert_eq!(
695 chunk.offset_utf16_to_offset(offset_utf16),
696 offset,
697 "mismatch at offset_utf16 {:?}",
698 offset_utf16
699 );
700 assert_eq!(
701 chunk.point_to_point_utf16(point),
702 point_utf16,
703 "mismatch at point {:?}",
704 point
705 );
706 assert_eq!(
707 chunk.point_utf16_to_offset(point_utf16, false),
708 offset,
709 "mismatch at point_utf16 {:?}",
710 point_utf16
711 );
712 assert_eq!(
713 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
714 point,
715 "mismatch for unclipped_point_utf16_to_point at {:?}",
716 point_utf16
717 );
718
719 assert_eq!(
720 chunk.clip_point(point, Bias::Left),
721 point,
722 "incorrect left clip at {:?}",
723 point
724 );
725 assert_eq!(
726 chunk.clip_point(point, Bias::Right),
727 point,
728 "incorrect right clip at {:?}",
729 point
730 );
731
732 for i in 1..c.len_utf8() {
733 let test_point = Point::new(point.row, point.column + i as u32);
734 assert_eq!(
735 chunk.clip_point(test_point, Bias::Left),
736 point,
737 "incorrect left clip within multi-byte char at {:?}",
738 test_point
739 );
740 assert_eq!(
741 chunk.clip_point(test_point, Bias::Right),
742 Point::new(point.row, point.column + c.len_utf8() as u32),
743 "incorrect right clip within multi-byte char at {:?}",
744 test_point
745 );
746 }
747
748 for i in 1..c.len_utf16() {
749 let test_point = Unclipped(PointUtf16::new(
750 point_utf16.row,
751 point_utf16.column + i as u32,
752 ));
753 assert_eq!(
754 chunk.unclipped_point_utf16_to_point(test_point),
755 point,
756 "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
757 test_point
758 );
759 assert_eq!(
760 chunk.clip_point_utf16(test_point, Bias::Left),
761 point_utf16,
762 "incorrect left clip_point_utf16 within multi-byte char at {:?}",
763 test_point
764 );
765 assert_eq!(
766 chunk.clip_point_utf16(test_point, Bias::Right),
767 PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
768 "incorrect right clip_point_utf16 within multi-byte char at {:?}",
769 test_point
770 );
771
772 let test_offset = OffsetUtf16(offset_utf16.0 + i);
773 assert_eq!(
774 chunk.clip_offset_utf16(test_offset, Bias::Left),
775 offset_utf16,
776 "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
777 test_offset
778 );
779 assert_eq!(
780 chunk.clip_offset_utf16(test_offset, Bias::Right),
781 OffsetUtf16(offset_utf16.0 + c.len_utf16()),
782 "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
783 test_offset
784 );
785 }
786
787 if c == '\n' {
788 point.row += 1;
789 point.column = 0;
790 point_utf16.row += 1;
791 point_utf16.column = 0;
792 } else {
793 point.column += c.len_utf8() as u32;
794 point_utf16.column += c.len_utf16() as u32;
795 }
796
797 if c == '\t' {
798 expected_tab_positions.push(TabPosition {
799 byte_offset: offset,
800 char_offset,
801 });
802 }
803
804 offset += c.len_utf8();
805 offset_utf16.0 += c.len_utf16();
806 }
807
808 let final_point = chunk.offset_to_point(offset);
809 assert_eq!(point, final_point, "mismatch at final offset {}", offset);
810 assert_eq!(
811 chunk.point_to_offset(point),
812 offset,
813 "mismatch at point {:?}",
814 point
815 );
816 assert_eq!(
817 chunk.offset_to_offset_utf16(offset),
818 offset_utf16,
819 "mismatch at offset {}",
820 offset
821 );
822 assert_eq!(
823 chunk.offset_utf16_to_offset(offset_utf16),
824 offset,
825 "mismatch at offset_utf16 {:?}",
826 offset_utf16
827 );
828 assert_eq!(
829 chunk.point_to_point_utf16(point),
830 point_utf16,
831 "mismatch at final point {:?}",
832 point
833 );
834 assert_eq!(
835 chunk.point_utf16_to_offset(point_utf16, false),
836 offset,
837 "mismatch at final point_utf16 {:?}",
838 point_utf16
839 );
840 assert_eq!(
841 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
842 point,
843 "mismatch for unclipped_point_utf16_to_point at final point {:?}",
844 point_utf16
845 );
846 assert_eq!(
847 chunk.clip_point(point, Bias::Left),
848 point,
849 "incorrect left clip at final point {:?}",
850 point
851 );
852 assert_eq!(
853 chunk.clip_point(point, Bias::Right),
854 point,
855 "incorrect right clip at final point {:?}",
856 point
857 );
858 assert_eq!(
859 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
860 point_utf16,
861 "incorrect left clip_point_utf16 at final point {:?}",
862 point_utf16
863 );
864 assert_eq!(
865 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
866 point_utf16,
867 "incorrect right clip_point_utf16 at final point {:?}",
868 point_utf16
869 );
870 assert_eq!(
871 chunk.clip_offset_utf16(offset_utf16, Bias::Left),
872 offset_utf16,
873 "incorrect left clip_offset_utf16 at final offset {:?}",
874 offset_utf16
875 );
876 assert_eq!(
877 chunk.clip_offset_utf16(offset_utf16, Bias::Right),
878 offset_utf16,
879 "incorrect right clip_offset_utf16 at final offset {:?}",
880 offset_utf16
881 );
882
883 // Verify length methods
884 assert_eq!(chunk.len(), text.len());
885 assert_eq!(
886 chunk.len_utf16().0,
887 text.chars().map(|c| c.len_utf16()).sum::<usize>()
888 );
889
890 // Verify line counting
891 let lines = chunk.lines();
892 let mut newline_count = 0;
893 let mut last_line_len = 0;
894 for c in text.chars() {
895 if c == '\n' {
896 newline_count += 1;
897 last_line_len = 0;
898 } else {
899 last_line_len += c.len_utf8() as u32;
900 }
901 }
902 assert_eq!(lines, Point::new(newline_count, last_line_len));
903
904 // Verify first/last line chars
905 if !text.is_empty() {
906 let first_line = text.split('\n').next().unwrap();
907 assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
908
909 let last_line = text.split('\n').next_back().unwrap();
910 assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
911 assert_eq!(
912 chunk.last_line_len_utf16(),
913 last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
914 );
915 }
916
917 // Verify longest row
918 let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
919 let mut max_chars = 0;
920 let mut current_row = 0;
921 let mut current_chars = 0;
922 let mut max_row = 0;
923
924 for c in text.chars() {
925 if c == '\n' {
926 if current_chars > max_chars {
927 max_chars = current_chars;
928 max_row = current_row;
929 }
930 current_row += 1;
931 current_chars = 0;
932 } else {
933 current_chars += 1;
934 }
935 }
936
937 if current_chars > max_chars {
938 max_chars = current_chars;
939 max_row = current_row;
940 }
941
942 assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
943 assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
944 }
945}