1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
2use arrayvec::ArrayString;
3use std::{cmp, ops::Range};
4use sum_tree::Bias;
5use unicode_segmentation::GraphemeCursor;
6use util::debug_panic;
7
8pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
9pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
10
11#[derive(Clone, Debug, Default)]
12pub struct Chunk {
13 chars: u128,
14 chars_utf16: u128,
15 newlines: u128,
16 tabs: u128,
17 pub text: ArrayString<MAX_BASE>,
18}
19
20impl Chunk {
21 #[inline(always)]
22 pub fn new(text: &str) -> Self {
23 let mut this = Chunk::default();
24 this.push_str(text);
25 this
26 }
27
28 #[inline(always)]
29 pub fn push_str(&mut self, text: &str) {
30 for (char_ix, c) in text.char_indices() {
31 let ix = self.text.len() + char_ix;
32 self.chars |= 1 << ix;
33 self.chars_utf16 |= 1 << ix;
34 self.chars_utf16 |= (c.len_utf16() as u128) << ix;
35 self.newlines |= ((c == '\n') as u128) << ix;
36 self.tabs |= ((c == '\t') as u128) << ix;
37 }
38 self.text.push_str(text);
39 }
40
41 #[inline(always)]
42 pub fn append(&mut self, slice: ChunkSlice) {
43 if slice.is_empty() {
44 return;
45 };
46
47 let base_ix = self.text.len();
48 self.chars |= slice.chars << base_ix;
49 self.chars_utf16 |= slice.chars_utf16 << base_ix;
50 self.newlines |= slice.newlines << base_ix;
51 self.tabs |= slice.tabs << base_ix;
52 self.text.push_str(&slice.text);
53 }
54
55 #[inline(always)]
56 pub fn as_slice(&self) -> ChunkSlice {
57 ChunkSlice {
58 chars: self.chars,
59 chars_utf16: self.chars_utf16,
60 newlines: self.newlines,
61 tabs: self.tabs,
62 text: &self.text,
63 }
64 }
65
66 #[inline(always)]
67 pub fn slice(&self, range: Range<usize>) -> ChunkSlice {
68 self.as_slice().slice(range)
69 }
70}
71
72#[derive(Clone, Copy, Debug)]
73pub struct ChunkSlice<'a> {
74 chars: u128,
75 chars_utf16: u128,
76 newlines: u128,
77 tabs: u128,
78 text: &'a str,
79}
80
81impl<'a> Into<Chunk> for ChunkSlice<'a> {
82 fn into(self) -> Chunk {
83 Chunk {
84 chars: self.chars,
85 chars_utf16: self.chars_utf16,
86 newlines: self.newlines,
87 tabs: self.tabs,
88 text: self.text.try_into().unwrap(),
89 }
90 }
91}
92
93impl<'a> ChunkSlice<'a> {
94 #[inline(always)]
95 pub fn is_empty(self) -> bool {
96 self.text.is_empty()
97 }
98
99 #[inline(always)]
100 pub fn is_char_boundary(self, offset: usize) -> bool {
101 self.text.is_char_boundary(offset)
102 }
103
104 #[inline(always)]
105 pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
106 if mid == MAX_BASE {
107 let left = self;
108 let right = ChunkSlice {
109 chars: 0,
110 chars_utf16: 0,
111 newlines: 0,
112 tabs: 0,
113 text: "",
114 };
115 (left, right)
116 } else {
117 let mask = (1u128 << mid) - 1;
118 let (left_text, right_text) = self.text.split_at(mid);
119 let left = ChunkSlice {
120 chars: self.chars & mask,
121 chars_utf16: self.chars_utf16 & mask,
122 newlines: self.newlines & mask,
123 tabs: self.tabs & mask,
124 text: left_text,
125 };
126 let right = ChunkSlice {
127 chars: self.chars >> mid,
128 chars_utf16: self.chars_utf16 >> mid,
129 newlines: self.newlines >> mid,
130 tabs: self.tabs >> mid,
131 text: right_text,
132 };
133 (left, right)
134 }
135 }
136
137 #[inline(always)]
138 pub fn slice(self, range: Range<usize>) -> Self {
139 let mask = if range.end == MAX_BASE {
140 u128::MAX
141 } else {
142 (1u128 << range.end) - 1
143 };
144 if range.start == MAX_BASE {
145 Self {
146 chars: 0,
147 chars_utf16: 0,
148 newlines: 0,
149 tabs: 0,
150 text: "",
151 }
152 } else {
153 Self {
154 chars: (self.chars & mask) >> range.start,
155 chars_utf16: (self.chars_utf16 & mask) >> range.start,
156 newlines: (self.newlines & mask) >> range.start,
157 tabs: (self.tabs & mask) >> range.start,
158 text: &self.text[range],
159 }
160 }
161 }
162
163 #[inline(always)]
164 pub fn text_summary(&self) -> TextSummary {
165 let (longest_row, longest_row_chars) = self.longest_row();
166 TextSummary {
167 len: self.len(),
168 len_utf16: self.len_utf16(),
169 lines: self.lines(),
170 first_line_chars: self.first_line_chars(),
171 last_line_chars: self.last_line_chars(),
172 last_line_len_utf16: self.last_line_len_utf16(),
173 longest_row,
174 longest_row_chars,
175 }
176 }
177
178 /// Get length in bytes
179 #[inline(always)]
180 pub fn len(&self) -> usize {
181 self.text.len()
182 }
183
184 /// Get length in UTF-16 code units
185 #[inline(always)]
186 pub fn len_utf16(&self) -> OffsetUtf16 {
187 OffsetUtf16(self.chars_utf16.count_ones() as usize)
188 }
189
190 /// Get point representing number of lines and length of last line
191 #[inline(always)]
192 pub fn lines(&self) -> Point {
193 let row = self.newlines.count_ones();
194 let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
195 Point::new(row, column)
196 }
197
198 /// Get number of chars in first line
199 #[inline(always)]
200 pub fn first_line_chars(&self) -> u32 {
201 if self.newlines == 0 {
202 self.chars.count_ones()
203 } else {
204 let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
205 (self.chars & mask).count_ones()
206 }
207 }
208
209 /// Get number of chars in last line
210 #[inline(always)]
211 pub fn last_line_chars(&self) -> u32 {
212 if self.newlines == 0 {
213 self.chars.count_ones()
214 } else {
215 let mask = !(u128::MAX >> self.newlines.leading_zeros());
216 (self.chars & mask).count_ones()
217 }
218 }
219
220 /// Get number of UTF-16 code units in last line
221 #[inline(always)]
222 pub fn last_line_len_utf16(&self) -> u32 {
223 if self.newlines == 0 {
224 self.chars_utf16.count_ones()
225 } else {
226 let mask = !(u128::MAX >> self.newlines.leading_zeros());
227 (self.chars_utf16 & mask).count_ones()
228 }
229 }
230
231 /// Get the longest row in the chunk and its length in characters.
232 #[inline(always)]
233 pub fn longest_row(&self) -> (u32, u32) {
234 let mut chars = self.chars;
235 let mut newlines = self.newlines;
236 let mut row = 0;
237 let mut longest_row = 0;
238 let mut longest_row_chars = 0;
239 while newlines > 0 {
240 let newline_ix = newlines.trailing_zeros();
241 let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
242 if row_chars > longest_row_chars {
243 longest_row = row;
244 longest_row_chars = row_chars;
245 }
246
247 newlines >>= newline_ix;
248 newlines >>= 1;
249 chars >>= newline_ix;
250 chars >>= 1;
251 row += 1;
252 }
253
254 let row_chars = chars.count_ones() as u8;
255 if row_chars > longest_row_chars {
256 (row, row_chars as u32)
257 } else {
258 (longest_row, longest_row_chars as u32)
259 }
260 }
261
262 #[inline(always)]
263 pub fn offset_to_point(&self, offset: usize) -> Point {
264 let mask = if offset == MAX_BASE {
265 u128::MAX
266 } else {
267 (1u128 << offset) - 1
268 };
269 let row = (self.newlines & mask).count_ones();
270 let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
271 let column = (offset - newline_ix as usize) as u32;
272 Point::new(row, column)
273 }
274
275 #[inline(always)]
276 pub fn point_to_offset(&self, point: Point) -> usize {
277 if point.row > self.lines().row {
278 debug_panic!(
279 "point {:?} extends beyond rows for string {:?}",
280 point,
281 self.text
282 );
283 return self.len();
284 }
285
286 let row_offset_range = self.offset_range_for_row(point.row);
287 if point.column > row_offset_range.len() as u32 {
288 debug_panic!(
289 "point {:?} extends beyond row for string {:?}",
290 point,
291 self.text
292 );
293 row_offset_range.end
294 } else {
295 row_offset_range.start + point.column as usize
296 }
297 }
298
299 #[inline(always)]
300 pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
301 let mask = if offset == MAX_BASE {
302 u128::MAX
303 } else {
304 (1u128 << offset) - 1
305 };
306 OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
307 }
308
309 #[inline(always)]
310 pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
311 if target.0 == 0 {
312 0
313 } else {
314 let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
315 if ix == MAX_BASE {
316 MAX_BASE
317 } else {
318 let utf8_additional_len = cmp::min(
319 (self.chars_utf16 >> ix).trailing_zeros() as usize,
320 self.text.len() - ix,
321 );
322 ix + utf8_additional_len
323 }
324 }
325 }
326
327 #[inline(always)]
328 pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
329 let mask = if offset == MAX_BASE {
330 u128::MAX
331 } else {
332 (1u128 << offset) - 1
333 };
334 let row = (self.newlines & mask).count_ones();
335 let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
336 let column = if newline_ix as usize == MAX_BASE {
337 0
338 } else {
339 ((self.chars_utf16 & mask) >> newline_ix).count_ones()
340 };
341 PointUtf16::new(row, column)
342 }
343
344 #[inline(always)]
345 pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
346 self.offset_to_point_utf16(self.point_to_offset(point))
347 }
348
349 #[inline(always)]
350 pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
351 let lines = self.lines();
352 if point.row > lines.row {
353 if !clip {
354 debug_panic!(
355 "point {:?} is beyond this chunk's extent {:?}",
356 point,
357 self.text
358 );
359 }
360 return self.len();
361 }
362
363 let row_offset_range = self.offset_range_for_row(point.row);
364 let line = self.slice(row_offset_range.clone());
365 if point.column > line.last_line_len_utf16() {
366 if !clip {
367 debug_panic!(
368 "point {:?} is beyond the end of the line in chunk {:?}",
369 point,
370 self.text
371 );
372 }
373 return line.len();
374 }
375
376 let mut offset = row_offset_range.start;
377 if point.column > 0 {
378 offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
379 if !self.text.is_char_boundary(offset) {
380 offset -= 1;
381 while !self.text.is_char_boundary(offset) {
382 offset -= 1;
383 }
384 if !clip {
385 debug_panic!(
386 "point {:?} is within character in chunk {:?}",
387 point,
388 self.text,
389 );
390 }
391 }
392 }
393 offset
394 }
395
396 #[inline(always)]
397 pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
398 let max_point = self.lines();
399 if point.0.row > max_point.row {
400 return max_point;
401 }
402
403 let row_offset_range = self.offset_range_for_row(point.0.row);
404 let line = self.slice(row_offset_range.clone());
405 if point.0.column == 0 {
406 Point::new(point.0.row, 0)
407 } else if point.0.column >= line.len_utf16().0 as u32 {
408 Point::new(point.0.row, line.len() as u32)
409 } else {
410 let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
411 while !line.text.is_char_boundary(column) {
412 column -= 1;
413 }
414 Point::new(point.0.row, column as u32)
415 }
416 }
417
418 #[inline(always)]
419 pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
420 let max_point = self.lines();
421 if point.row > max_point.row {
422 return max_point;
423 }
424
425 let line = self.slice(self.offset_range_for_row(point.row));
426 if point.column == 0 {
427 point
428 } else if point.column >= line.len() as u32 {
429 Point::new(point.row, line.len() as u32)
430 } else {
431 let mut column = point.column as usize;
432 let bytes = line.text.as_bytes();
433 if bytes[column - 1] < 128 && bytes[column] < 128 {
434 return Point::new(point.row, column as u32);
435 }
436
437 let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
438 loop {
439 if line.is_char_boundary(column)
440 && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
441 {
442 break;
443 }
444
445 match bias {
446 Bias::Left => column -= 1,
447 Bias::Right => column += 1,
448 }
449 grapheme_cursor.set_cursor(column);
450 }
451 Point::new(point.row, column as u32)
452 }
453 }
454
455 #[inline(always)]
456 pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
457 let max_point = self.lines();
458 if point.0.row > max_point.row {
459 PointUtf16::new(max_point.row, self.last_line_len_utf16())
460 } else {
461 let line = self.slice(self.offset_range_for_row(point.0.row));
462 let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
463 PointUtf16::new(point.0.row, column.0 as u32)
464 }
465 }
466
467 #[inline(always)]
468 pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
469 if target == OffsetUtf16::default() {
470 OffsetUtf16::default()
471 } else if target >= self.len_utf16() {
472 self.len_utf16()
473 } else {
474 let mut offset = self.offset_utf16_to_offset(target);
475 while !self.text.is_char_boundary(offset) {
476 if bias == Bias::Left {
477 offset -= 1;
478 } else {
479 offset += 1;
480 }
481 }
482 self.offset_to_offset_utf16(offset)
483 }
484 }
485
486 #[inline(always)]
487 fn offset_range_for_row(&self, row: u32) -> Range<usize> {
488 let row_start = if row > 0 {
489 nth_set_bit(self.newlines, row as usize) + 1
490 } else {
491 0
492 };
493 let row_len = if row_start == MAX_BASE {
494 0
495 } else {
496 cmp::min(
497 (self.newlines >> row_start).trailing_zeros(),
498 (self.text.len() - row_start) as u32,
499 )
500 };
501 row_start..row_start + row_len as usize
502 }
503
504 #[inline(always)]
505 pub fn tabs(&self) -> Tabs {
506 Tabs {
507 tabs: self.tabs,
508 chars: self.chars,
509 }
510 }
511}
512
513pub struct Tabs {
514 tabs: u128,
515 chars: u128,
516}
517
518#[derive(Debug, PartialEq, Eq)]
519pub struct TabPosition {
520 pub byte_offset: usize,
521 pub char_offset: usize,
522}
523
524impl Iterator for Tabs {
525 type Item = TabPosition;
526
527 fn next(&mut self) -> Option<Self::Item> {
528 if self.tabs == 0 {
529 return None;
530 }
531
532 let tab_offset = self.tabs.trailing_zeros() as usize;
533 let chars_mask = (1 << tab_offset) - 1;
534 let char_offset = (self.chars & chars_mask).count_ones() as usize;
535
536 // Since tabs are 1 byte the tab offset is the same as the byte offset
537 let position = TabPosition {
538 byte_offset: tab_offset,
539 char_offset: char_offset,
540 };
541 // Remove the tab we've just seen
542 self.tabs ^= 1 << tab_offset;
543
544 Some(position)
545 }
546}
547
548/// Finds the n-th bit that is set to 1.
549#[inline(always)]
550fn nth_set_bit(v: u128, n: usize) -> usize {
551 let low = v as u64;
552 let high = (v >> 64) as u64;
553
554 let low_count = low.count_ones() as usize;
555 if n > low_count {
556 64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
557 } else {
558 nth_set_bit_u64(low, n as u64) as usize
559 }
560}
561
562#[inline(always)]
563fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
564 let v = v.reverse_bits();
565 let mut s: u64 = 64;
566
567 // Parallel bit count intermediates
568 let a = v - ((v >> 1) & (u64::MAX / 3));
569 let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
570 let c = (b + (b >> 4)) & (u64::MAX / 0x11);
571 let d = (c + (c >> 8)) & (u64::MAX / 0x101);
572
573 // Branchless select
574 let t = (d >> 32) + (d >> 48);
575 s -= (t.wrapping_sub(n) & 256) >> 3;
576 n -= t & (t.wrapping_sub(n) >> 8);
577
578 let t = (d >> (s - 16)) & 0xff;
579 s -= (t.wrapping_sub(n) & 256) >> 4;
580 n -= t & (t.wrapping_sub(n) >> 8);
581
582 let t = (c >> (s - 8)) & 0xf;
583 s -= (t.wrapping_sub(n) & 256) >> 5;
584 n -= t & (t.wrapping_sub(n) >> 8);
585
586 let t = (b >> (s - 4)) & 0x7;
587 s -= (t.wrapping_sub(n) & 256) >> 6;
588 n -= t & (t.wrapping_sub(n) >> 8);
589
590 let t = (a >> (s - 2)) & 0x3;
591 s -= (t.wrapping_sub(n) & 256) >> 7;
592 n -= t & (t.wrapping_sub(n) >> 8);
593
594 let t = (v >> (s - 1)) & 0x1;
595 s -= (t.wrapping_sub(n) & 256) >> 8;
596
597 65 - s - 1
598}
599
600#[cfg(test)]
601mod tests {
602 use super::*;
603 use rand::prelude::*;
604 use util::RandomCharIter;
605
606 #[gpui::test(iterations = 100)]
607 fn test_random_chunks(mut rng: StdRng) {
608 let chunk_len = rng.gen_range(0..=MAX_BASE);
609 let text = RandomCharIter::new(&mut rng)
610 .take(chunk_len)
611 .collect::<String>();
612 let mut ix = chunk_len;
613 while !text.is_char_boundary(ix) {
614 ix -= 1;
615 }
616 let text = &text[..ix];
617
618 log::info!("Chunk: {:?}", text);
619 let chunk = Chunk::new(&text);
620 verify_chunk(chunk.as_slice(), text);
621
622 for _ in 0..10 {
623 let mut start = rng.gen_range(0..=chunk.text.len());
624 let mut end = rng.gen_range(start..=chunk.text.len());
625 while !chunk.text.is_char_boundary(start) {
626 start -= 1;
627 }
628 while !chunk.text.is_char_boundary(end) {
629 end -= 1;
630 }
631 let range = start..end;
632 log::info!("Range: {:?}", range);
633 let text_slice = &text[range.clone()];
634 let chunk_slice = chunk.slice(range);
635 verify_chunk(chunk_slice, text_slice);
636 }
637 }
638
639 #[gpui::test(iterations = 1000)]
640 fn test_nth_set_bit_random(mut rng: StdRng) {
641 let set_count = rng.gen_range(0..=128);
642 let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
643 set_bits.sort();
644 let mut n = 0;
645 for ix in set_bits.iter().copied() {
646 n |= 1 << ix;
647 }
648
649 for (mut ix, position) in set_bits.into_iter().enumerate() {
650 ix += 1;
651 assert_eq!(
652 nth_set_bit(n, ix),
653 position,
654 "nth_set_bit({:0128b}, {})",
655 n,
656 ix
657 );
658 }
659 }
660
661 fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
662 let mut offset = 0;
663 let mut offset_utf16 = OffsetUtf16(0);
664 let mut point = Point::zero();
665 let mut point_utf16 = PointUtf16::zero();
666
667 log::info!("Verifying chunk {:?}", text);
668 assert_eq!(chunk.offset_to_point(0), Point::zero());
669
670 let mut expected_tab_positions = Vec::new();
671
672 for (char_offset, c) in text.chars().enumerate() {
673 let expected_point = chunk.offset_to_point(offset);
674 assert_eq!(point, expected_point, "mismatch at offset {}", offset);
675 assert_eq!(
676 chunk.point_to_offset(point),
677 offset,
678 "mismatch at point {:?}",
679 point
680 );
681 assert_eq!(
682 chunk.offset_to_offset_utf16(offset),
683 offset_utf16,
684 "mismatch at offset {}",
685 offset
686 );
687 assert_eq!(
688 chunk.offset_utf16_to_offset(offset_utf16),
689 offset,
690 "mismatch at offset_utf16 {:?}",
691 offset_utf16
692 );
693 assert_eq!(
694 chunk.point_to_point_utf16(point),
695 point_utf16,
696 "mismatch at point {:?}",
697 point
698 );
699 assert_eq!(
700 chunk.point_utf16_to_offset(point_utf16, false),
701 offset,
702 "mismatch at point_utf16 {:?}",
703 point_utf16
704 );
705 assert_eq!(
706 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
707 point,
708 "mismatch for unclipped_point_utf16_to_point at {:?}",
709 point_utf16
710 );
711
712 assert_eq!(
713 chunk.clip_point(point, Bias::Left),
714 point,
715 "incorrect left clip at {:?}",
716 point
717 );
718 assert_eq!(
719 chunk.clip_point(point, Bias::Right),
720 point,
721 "incorrect right clip at {:?}",
722 point
723 );
724
725 for i in 1..c.len_utf8() {
726 let test_point = Point::new(point.row, point.column + i as u32);
727 assert_eq!(
728 chunk.clip_point(test_point, Bias::Left),
729 point,
730 "incorrect left clip within multi-byte char at {:?}",
731 test_point
732 );
733 assert_eq!(
734 chunk.clip_point(test_point, Bias::Right),
735 Point::new(point.row, point.column + c.len_utf8() as u32),
736 "incorrect right clip within multi-byte char at {:?}",
737 test_point
738 );
739 }
740
741 for i in 1..c.len_utf16() {
742 let test_point = Unclipped(PointUtf16::new(
743 point_utf16.row,
744 point_utf16.column + i as u32,
745 ));
746 assert_eq!(
747 chunk.unclipped_point_utf16_to_point(test_point),
748 point,
749 "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
750 test_point
751 );
752 assert_eq!(
753 chunk.clip_point_utf16(test_point, Bias::Left),
754 point_utf16,
755 "incorrect left clip_point_utf16 within multi-byte char at {:?}",
756 test_point
757 );
758 assert_eq!(
759 chunk.clip_point_utf16(test_point, Bias::Right),
760 PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
761 "incorrect right clip_point_utf16 within multi-byte char at {:?}",
762 test_point
763 );
764
765 let test_offset = OffsetUtf16(offset_utf16.0 + i);
766 assert_eq!(
767 chunk.clip_offset_utf16(test_offset, Bias::Left),
768 offset_utf16,
769 "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
770 test_offset
771 );
772 assert_eq!(
773 chunk.clip_offset_utf16(test_offset, Bias::Right),
774 OffsetUtf16(offset_utf16.0 + c.len_utf16()),
775 "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
776 test_offset
777 );
778 }
779
780 if c == '\n' {
781 point.row += 1;
782 point.column = 0;
783 point_utf16.row += 1;
784 point_utf16.column = 0;
785 } else {
786 point.column += c.len_utf8() as u32;
787 point_utf16.column += c.len_utf16() as u32;
788 }
789
790 if c == '\t' {
791 expected_tab_positions.push(TabPosition {
792 byte_offset: offset,
793 char_offset,
794 });
795 }
796
797 offset += c.len_utf8();
798 offset_utf16.0 += c.len_utf16();
799 }
800
801 let final_point = chunk.offset_to_point(offset);
802 assert_eq!(point, final_point, "mismatch at final offset {}", offset);
803 assert_eq!(
804 chunk.point_to_offset(point),
805 offset,
806 "mismatch at point {:?}",
807 point
808 );
809 assert_eq!(
810 chunk.offset_to_offset_utf16(offset),
811 offset_utf16,
812 "mismatch at offset {}",
813 offset
814 );
815 assert_eq!(
816 chunk.offset_utf16_to_offset(offset_utf16),
817 offset,
818 "mismatch at offset_utf16 {:?}",
819 offset_utf16
820 );
821 assert_eq!(
822 chunk.point_to_point_utf16(point),
823 point_utf16,
824 "mismatch at final point {:?}",
825 point
826 );
827 assert_eq!(
828 chunk.point_utf16_to_offset(point_utf16, false),
829 offset,
830 "mismatch at final point_utf16 {:?}",
831 point_utf16
832 );
833 assert_eq!(
834 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
835 point,
836 "mismatch for unclipped_point_utf16_to_point at final point {:?}",
837 point_utf16
838 );
839 assert_eq!(
840 chunk.clip_point(point, Bias::Left),
841 point,
842 "incorrect left clip at final point {:?}",
843 point
844 );
845 assert_eq!(
846 chunk.clip_point(point, Bias::Right),
847 point,
848 "incorrect right clip at final point {:?}",
849 point
850 );
851 assert_eq!(
852 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
853 point_utf16,
854 "incorrect left clip_point_utf16 at final point {:?}",
855 point_utf16
856 );
857 assert_eq!(
858 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
859 point_utf16,
860 "incorrect right clip_point_utf16 at final point {:?}",
861 point_utf16
862 );
863 assert_eq!(
864 chunk.clip_offset_utf16(offset_utf16, Bias::Left),
865 offset_utf16,
866 "incorrect left clip_offset_utf16 at final offset {:?}",
867 offset_utf16
868 );
869 assert_eq!(
870 chunk.clip_offset_utf16(offset_utf16, Bias::Right),
871 offset_utf16,
872 "incorrect right clip_offset_utf16 at final offset {:?}",
873 offset_utf16
874 );
875
876 // Verify length methods
877 assert_eq!(chunk.len(), text.len());
878 assert_eq!(
879 chunk.len_utf16().0,
880 text.chars().map(|c| c.len_utf16()).sum::<usize>()
881 );
882
883 // Verify line counting
884 let lines = chunk.lines();
885 let mut newline_count = 0;
886 let mut last_line_len = 0;
887 for c in text.chars() {
888 if c == '\n' {
889 newline_count += 1;
890 last_line_len = 0;
891 } else {
892 last_line_len += c.len_utf8() as u32;
893 }
894 }
895 assert_eq!(lines, Point::new(newline_count, last_line_len));
896
897 // Verify first/last line chars
898 if !text.is_empty() {
899 let first_line = text.split('\n').next().unwrap();
900 assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
901
902 let last_line = text.split('\n').last().unwrap();
903 assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
904 assert_eq!(
905 chunk.last_line_len_utf16(),
906 last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
907 );
908 }
909
910 // Verify longest row
911 let (longest_row, longest_chars) = chunk.longest_row();
912 let mut max_chars = 0;
913 let mut current_row = 0;
914 let mut current_chars = 0;
915 let mut max_row = 0;
916
917 for c in text.chars() {
918 if c == '\n' {
919 if current_chars > max_chars {
920 max_chars = current_chars;
921 max_row = current_row;
922 }
923 current_row += 1;
924 current_chars = 0;
925 } else {
926 current_chars += 1;
927 }
928 }
929
930 if current_chars > max_chars {
931 max_chars = current_chars;
932 max_row = current_row;
933 }
934
935 assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
936 assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
937 }
938}