1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
2use arrayvec::ArrayString;
3use std::{cmp, ops::Range};
4use sum_tree::Bias;
5use unicode_segmentation::GraphemeCursor;
6use util::debug_panic;
7
8pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
9pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
10
11#[derive(Clone, Debug, Default)]
12pub struct Chunk {
13 chars: u128,
14 chars_utf16: u128,
15 newlines: u128,
16 pub text: ArrayString<MAX_BASE>,
17}
18
19impl Chunk {
20 #[inline(always)]
21 pub fn new(text: &str) -> Self {
22 let mut this = Chunk::default();
23 this.push_str(text);
24 this
25 }
26
27 #[inline(always)]
28 pub fn push_str(&mut self, text: &str) {
29 for (char_ix, c) in text.char_indices() {
30 let ix = self.text.len() + char_ix;
31 self.chars |= 1 << ix;
32 self.chars_utf16 |= 1 << ix;
33 self.chars_utf16 |= (c.len_utf16() as u128) << ix;
34 self.newlines |= ((c == '\n') as u128) << ix;
35 }
36 self.text.push_str(text);
37 }
38
39 #[inline(always)]
40 pub fn append(&mut self, slice: ChunkSlice) {
41 if slice.is_empty() {
42 return;
43 };
44
45 let base_ix = self.text.len();
46 self.chars |= slice.chars << base_ix;
47 self.chars_utf16 |= slice.chars_utf16 << base_ix;
48 self.newlines |= slice.newlines << base_ix;
49 self.text.push_str(&slice.text);
50 }
51
52 #[inline(always)]
53 pub fn as_slice(&self) -> ChunkSlice {
54 ChunkSlice {
55 chars: self.chars,
56 chars_utf16: self.chars_utf16,
57 newlines: self.newlines,
58 text: &self.text,
59 }
60 }
61
62 #[inline(always)]
63 pub fn slice(&self, range: Range<usize>) -> ChunkSlice {
64 self.as_slice().slice(range)
65 }
66}
67
68#[derive(Clone, Copy, Debug)]
69pub struct ChunkSlice<'a> {
70 chars: u128,
71 chars_utf16: u128,
72 newlines: u128,
73 text: &'a str,
74}
75
76impl<'a> Into<Chunk> for ChunkSlice<'a> {
77 fn into(self) -> Chunk {
78 Chunk {
79 chars: self.chars,
80 chars_utf16: self.chars_utf16,
81 newlines: self.newlines,
82 text: self.text.try_into().unwrap(),
83 }
84 }
85}
86
87impl<'a> ChunkSlice<'a> {
88 #[inline(always)]
89 pub fn is_empty(self) -> bool {
90 self.text.is_empty()
91 }
92
93 #[inline(always)]
94 pub fn is_char_boundary(self, offset: usize) -> bool {
95 self.text.is_char_boundary(offset)
96 }
97
98 #[inline(always)]
99 pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
100 if mid == MAX_BASE {
101 let left = self;
102 let right = ChunkSlice {
103 chars: 0,
104 chars_utf16: 0,
105 newlines: 0,
106 text: "",
107 };
108 (left, right)
109 } else {
110 let mask = if mid == MAX_BASE {
111 u128::MAX
112 } else {
113 (1u128 << mid) - 1
114 };
115 let (left_text, right_text) = self.text.split_at(mid);
116 let left = ChunkSlice {
117 chars: self.chars & mask,
118 chars_utf16: self.chars_utf16 & mask,
119 newlines: self.newlines & mask,
120 text: left_text,
121 };
122 let right = ChunkSlice {
123 chars: self.chars >> mid,
124 chars_utf16: self.chars_utf16 >> mid,
125 newlines: self.newlines >> mid,
126 text: right_text,
127 };
128 (left, right)
129 }
130 }
131
132 #[inline(always)]
133 pub fn slice(self, range: Range<usize>) -> Self {
134 let mask = if range.end == MAX_BASE {
135 u128::MAX
136 } else {
137 (1u128 << range.end) - 1
138 };
139 if range.start == MAX_BASE {
140 Self {
141 chars: 0,
142 chars_utf16: 0,
143 newlines: 0,
144 text: "",
145 }
146 } else {
147 Self {
148 chars: (self.chars & mask) >> range.start,
149 chars_utf16: (self.chars_utf16 & mask) >> range.start,
150 newlines: (self.newlines & mask) >> range.start,
151 text: &self.text[range],
152 }
153 }
154 }
155
156 #[inline(always)]
157 pub fn text_summary(&self) -> TextSummary {
158 let (longest_row, longest_row_chars) = self.longest_row();
159 TextSummary {
160 len: self.len(),
161 len_utf16: self.len_utf16(),
162 lines: self.lines(),
163 first_line_chars: self.first_line_chars(),
164 last_line_chars: self.last_line_chars(),
165 last_line_len_utf16: self.last_line_len_utf16(),
166 longest_row,
167 longest_row_chars,
168 }
169 }
170
171 /// Get length in bytes
172 #[inline(always)]
173 pub fn len(&self) -> usize {
174 self.text.len()
175 }
176
177 /// Get length in UTF-16 code units
178 #[inline(always)]
179 pub fn len_utf16(&self) -> OffsetUtf16 {
180 OffsetUtf16(self.chars_utf16.count_ones() as usize)
181 }
182
183 /// Get point representing number of lines and length of last line
184 #[inline(always)]
185 pub fn lines(&self) -> Point {
186 let row = self.newlines.count_ones();
187 let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
188 Point::new(row, column)
189 }
190
191 /// Get number of chars in first line
192 #[inline(always)]
193 pub fn first_line_chars(&self) -> u32 {
194 if self.newlines == 0 {
195 self.chars.count_ones()
196 } else {
197 let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
198 (self.chars & mask).count_ones()
199 }
200 }
201
202 /// Get number of chars in last line
203 #[inline(always)]
204 pub fn last_line_chars(&self) -> u32 {
205 if self.newlines == 0 {
206 self.chars.count_ones()
207 } else {
208 let mask = !(u128::MAX >> self.newlines.leading_zeros());
209 (self.chars & mask).count_ones()
210 }
211 }
212
213 /// Get number of UTF-16 code units in last line
214 #[inline(always)]
215 pub fn last_line_len_utf16(&self) -> u32 {
216 if self.newlines == 0 {
217 self.chars_utf16.count_ones()
218 } else {
219 let mask = !(u128::MAX >> self.newlines.leading_zeros());
220 (self.chars_utf16 & mask).count_ones()
221 }
222 }
223
224 /// Get the longest row in the chunk and its length in characters.
225 #[inline(always)]
226 pub fn longest_row(&self) -> (u32, u32) {
227 let mut chars = self.chars;
228 let mut newlines = self.newlines;
229 let mut row = 0;
230 let mut longest_row = 0;
231 let mut longest_row_chars = 0;
232 while newlines > 0 {
233 let newline_ix = newlines.trailing_zeros();
234 let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
235 if row_chars > longest_row_chars {
236 longest_row = row;
237 longest_row_chars = row_chars;
238 }
239
240 newlines >>= newline_ix;
241 newlines >>= 1;
242 chars >>= newline_ix;
243 chars >>= 1;
244 row += 1;
245 }
246
247 let row_chars = chars.count_ones() as u8;
248 if row_chars > longest_row_chars {
249 (row, row_chars as u32)
250 } else {
251 (longest_row, longest_row_chars as u32)
252 }
253 }
254
255 #[inline(always)]
256 pub fn offset_to_point(&self, offset: usize) -> Point {
257 let mask = if offset == MAX_BASE {
258 u128::MAX
259 } else {
260 (1u128 << offset) - 1
261 };
262 let row = (self.newlines & mask).count_ones();
263 let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
264 let column = (offset - newline_ix as usize) as u32;
265 Point::new(row, column)
266 }
267
268 #[inline(always)]
269 pub fn point_to_offset(&self, point: Point) -> usize {
270 if point.row > self.lines().row {
271 debug_panic!(
272 "point {:?} extends beyond rows for string {:?}",
273 point,
274 self.text
275 );
276 return self.len();
277 }
278
279 let row_offset_range = self.offset_range_for_row(point.row);
280 if point.column > row_offset_range.len() as u32 {
281 debug_panic!(
282 "point {:?} extends beyond row for string {:?}",
283 point,
284 self.text
285 );
286 row_offset_range.end
287 } else {
288 row_offset_range.start + point.column as usize
289 }
290 }
291
292 #[inline(always)]
293 pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
294 let mask = if offset == MAX_BASE {
295 u128::MAX
296 } else {
297 (1u128 << offset) - 1
298 };
299 OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
300 }
301
302 #[inline(always)]
303 pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
304 if target.0 == 0 {
305 0
306 } else {
307 let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
308 if ix == MAX_BASE {
309 MAX_BASE
310 } else {
311 let utf8_additional_len = cmp::min(
312 (self.chars_utf16 >> ix).trailing_zeros() as usize,
313 self.text.len() - ix,
314 );
315 ix + utf8_additional_len
316 }
317 }
318 }
319
320 #[inline(always)]
321 pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
322 let mask = if offset == MAX_BASE {
323 u128::MAX
324 } else {
325 (1u128 << offset) - 1
326 };
327 let row = (self.newlines & mask).count_ones();
328 let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
329 let column = if newline_ix as usize == MAX_BASE {
330 0
331 } else {
332 ((self.chars_utf16 & mask) >> newline_ix).count_ones()
333 };
334 PointUtf16::new(row, column)
335 }
336
337 #[inline(always)]
338 pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
339 self.offset_to_point_utf16(self.point_to_offset(point))
340 }
341
342 #[inline(always)]
343 pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
344 let lines = self.lines();
345 if point.row > lines.row {
346 if !clip {
347 debug_panic!(
348 "point {:?} is beyond this chunk's extent {:?}",
349 point,
350 self.text
351 );
352 }
353 return self.len();
354 }
355
356 let row_offset_range = self.offset_range_for_row(point.row);
357 let line = self.slice(row_offset_range.clone());
358 if point.column > line.last_line_len_utf16() {
359 if !clip {
360 debug_panic!(
361 "point {:?} is beyond the end of the line in chunk {:?}",
362 point,
363 self.text
364 );
365 }
366 return line.len();
367 }
368
369 let mut offset = row_offset_range.start;
370 if point.column > 0 {
371 offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
372 if !self.text.is_char_boundary(offset) {
373 offset -= 1;
374 while !self.text.is_char_boundary(offset) {
375 offset -= 1;
376 }
377 if !clip {
378 debug_panic!(
379 "point {:?} is within character in chunk {:?}",
380 point,
381 self.text,
382 );
383 }
384 }
385 }
386 offset
387 }
388
389 #[inline(always)]
390 pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
391 let max_point = self.lines();
392 if point.0.row > max_point.row {
393 return max_point;
394 }
395
396 let row_offset_range = self.offset_range_for_row(point.0.row);
397 let line = self.slice(row_offset_range.clone());
398 if point.0.column == 0 {
399 Point::new(point.0.row, 0)
400 } else if point.0.column >= line.len_utf16().0 as u32 {
401 Point::new(point.0.row, line.len() as u32)
402 } else {
403 let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
404 while !line.text.is_char_boundary(column) {
405 column -= 1;
406 }
407 Point::new(point.0.row, column as u32)
408 }
409 }
410
411 #[inline(always)]
412 pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
413 let max_point = self.lines();
414 if point.row > max_point.row {
415 return max_point;
416 }
417
418 let line = self.slice(self.offset_range_for_row(point.row));
419 if point.column == 0 {
420 point
421 } else if point.column >= line.len() as u32 {
422 Point::new(point.row, line.len() as u32)
423 } else {
424 let mut column = point.column as usize;
425 let bytes = line.text.as_bytes();
426 if bytes[column - 1] < 128 && bytes[column] < 128 {
427 return Point::new(point.row, column as u32);
428 }
429
430 let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
431 loop {
432 if line.is_char_boundary(column)
433 && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
434 {
435 break;
436 }
437
438 match bias {
439 Bias::Left => column -= 1,
440 Bias::Right => column += 1,
441 }
442 grapheme_cursor.set_cursor(column);
443 }
444 Point::new(point.row, column as u32)
445 }
446 }
447
448 #[inline(always)]
449 pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
450 let max_point = self.lines();
451 if point.0.row > max_point.row {
452 PointUtf16::new(max_point.row, self.last_line_len_utf16())
453 } else {
454 let line = self.slice(self.offset_range_for_row(point.0.row));
455 let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
456 PointUtf16::new(point.0.row, column.0 as u32)
457 }
458 }
459
460 #[inline(always)]
461 pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
462 if target == OffsetUtf16::default() {
463 OffsetUtf16::default()
464 } else if target >= self.len_utf16() {
465 self.len_utf16()
466 } else {
467 let mut offset = self.offset_utf16_to_offset(target);
468 while !self.text.is_char_boundary(offset) {
469 if bias == Bias::Left {
470 offset -= 1;
471 } else {
472 offset += 1;
473 }
474 }
475 self.offset_to_offset_utf16(offset)
476 }
477 }
478
479 #[inline(always)]
480 fn offset_range_for_row(&self, row: u32) -> Range<usize> {
481 let row_start = if row > 0 {
482 nth_set_bit(self.newlines, row as usize) + 1
483 } else {
484 0
485 };
486 let row_len = if row_start == MAX_BASE {
487 0
488 } else {
489 cmp::min(
490 (self.newlines >> row_start).trailing_zeros(),
491 (self.text.len() - row_start) as u32,
492 )
493 };
494 row_start..row_start + row_len as usize
495 }
496}
497
498/// Finds the n-th bit that is set to 1.
499#[inline(always)]
500fn nth_set_bit(v: u128, n: usize) -> usize {
501 let low = v as u64;
502 let high = (v >> 64) as u64;
503
504 let low_count = low.count_ones() as usize;
505 if n > low_count {
506 64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
507 } else {
508 nth_set_bit_u64(low, n as u64) as usize
509 }
510}
511
512#[inline(always)]
513fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
514 let v = v.reverse_bits();
515 let mut s: u64 = 64;
516
517 // Parallel bit count intermediates
518 let a = v - ((v >> 1) & (u64::MAX / 3));
519 let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
520 let c = (b + (b >> 4)) & (u64::MAX / 0x11);
521 let d = (c + (c >> 8)) & (u64::MAX / 0x101);
522
523 // Branchless select
524 let t = (d >> 32) + (d >> 48);
525 s -= (t.wrapping_sub(n) & 256) >> 3;
526 n -= t & (t.wrapping_sub(n) >> 8);
527
528 let t = (d >> (s - 16)) & 0xff;
529 s -= (t.wrapping_sub(n) & 256) >> 4;
530 n -= t & (t.wrapping_sub(n) >> 8);
531
532 let t = (c >> (s - 8)) & 0xf;
533 s -= (t.wrapping_sub(n) & 256) >> 5;
534 n -= t & (t.wrapping_sub(n) >> 8);
535
536 let t = (b >> (s - 4)) & 0x7;
537 s -= (t.wrapping_sub(n) & 256) >> 6;
538 n -= t & (t.wrapping_sub(n) >> 8);
539
540 let t = (a >> (s - 2)) & 0x3;
541 s -= (t.wrapping_sub(n) & 256) >> 7;
542 n -= t & (t.wrapping_sub(n) >> 8);
543
544 let t = (v >> (s - 1)) & 0x1;
545 s -= (t.wrapping_sub(n) & 256) >> 8;
546
547 65 - s - 1
548}
549
550#[cfg(test)]
551mod tests {
552 use super::*;
553 use rand::prelude::*;
554 use util::RandomCharIter;
555
556 #[gpui::test(iterations = 100)]
557 fn test_random_chunks(mut rng: StdRng) {
558 let chunk_len = rng.gen_range(0..=MAX_BASE);
559 let text = RandomCharIter::new(&mut rng)
560 .take(chunk_len)
561 .collect::<String>();
562 let mut ix = chunk_len;
563 while !text.is_char_boundary(ix) {
564 ix -= 1;
565 }
566 let text = &text[..ix];
567
568 log::info!("Chunk: {:?}", text);
569 let chunk = Chunk::new(&text);
570 verify_chunk(chunk.as_slice(), text);
571
572 for _ in 0..10 {
573 let mut start = rng.gen_range(0..=chunk.text.len());
574 let mut end = rng.gen_range(start..=chunk.text.len());
575 while !chunk.text.is_char_boundary(start) {
576 start -= 1;
577 }
578 while !chunk.text.is_char_boundary(end) {
579 end -= 1;
580 }
581 let range = start..end;
582 log::info!("Range: {:?}", range);
583 let text_slice = &text[range.clone()];
584 let chunk_slice = chunk.slice(range);
585 verify_chunk(chunk_slice, text_slice);
586 }
587 }
588
589 #[gpui::test(iterations = 1000)]
590 fn test_nth_set_bit_random(mut rng: StdRng) {
591 let set_count = rng.gen_range(0..=128);
592 let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
593 set_bits.sort();
594 let mut n = 0;
595 for ix in set_bits.iter().copied() {
596 n |= 1 << ix;
597 }
598
599 for (mut ix, position) in set_bits.into_iter().enumerate() {
600 ix += 1;
601 assert_eq!(
602 nth_set_bit(n, ix),
603 position,
604 "nth_set_bit({:0128b}, {})",
605 n,
606 ix
607 );
608 }
609 }
610
611 fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
612 let mut offset = 0;
613 let mut offset_utf16 = OffsetUtf16(0);
614 let mut point = Point::zero();
615 let mut point_utf16 = PointUtf16::zero();
616
617 log::info!("Verifying chunk {:?}", text);
618 assert_eq!(chunk.offset_to_point(0), Point::zero());
619
620 for c in text.chars() {
621 let expected_point = chunk.offset_to_point(offset);
622 assert_eq!(point, expected_point, "mismatch at offset {}", offset);
623 assert_eq!(
624 chunk.point_to_offset(point),
625 offset,
626 "mismatch at point {:?}",
627 point
628 );
629 assert_eq!(
630 chunk.offset_to_offset_utf16(offset),
631 offset_utf16,
632 "mismatch at offset {}",
633 offset
634 );
635 assert_eq!(
636 chunk.offset_utf16_to_offset(offset_utf16),
637 offset,
638 "mismatch at offset_utf16 {:?}",
639 offset_utf16
640 );
641 assert_eq!(
642 chunk.point_to_point_utf16(point),
643 point_utf16,
644 "mismatch at point {:?}",
645 point
646 );
647 assert_eq!(
648 chunk.point_utf16_to_offset(point_utf16, false),
649 offset,
650 "mismatch at point_utf16 {:?}",
651 point_utf16
652 );
653 assert_eq!(
654 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
655 point,
656 "mismatch for unclipped_point_utf16_to_point at {:?}",
657 point_utf16
658 );
659
660 assert_eq!(
661 chunk.clip_point(point, Bias::Left),
662 point,
663 "incorrect left clip at {:?}",
664 point
665 );
666 assert_eq!(
667 chunk.clip_point(point, Bias::Right),
668 point,
669 "incorrect right clip at {:?}",
670 point
671 );
672
673 for i in 1..c.len_utf8() {
674 let test_point = Point::new(point.row, point.column + i as u32);
675 assert_eq!(
676 chunk.clip_point(test_point, Bias::Left),
677 point,
678 "incorrect left clip within multi-byte char at {:?}",
679 test_point
680 );
681 assert_eq!(
682 chunk.clip_point(test_point, Bias::Right),
683 Point::new(point.row, point.column + c.len_utf8() as u32),
684 "incorrect right clip within multi-byte char at {:?}",
685 test_point
686 );
687 }
688
689 for i in 1..c.len_utf16() {
690 let test_point = Unclipped(PointUtf16::new(
691 point_utf16.row,
692 point_utf16.column + i as u32,
693 ));
694 assert_eq!(
695 chunk.unclipped_point_utf16_to_point(test_point),
696 point,
697 "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
698 test_point
699 );
700 assert_eq!(
701 chunk.clip_point_utf16(test_point, Bias::Left),
702 point_utf16,
703 "incorrect left clip_point_utf16 within multi-byte char at {:?}",
704 test_point
705 );
706 assert_eq!(
707 chunk.clip_point_utf16(test_point, Bias::Right),
708 PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
709 "incorrect right clip_point_utf16 within multi-byte char at {:?}",
710 test_point
711 );
712
713 let test_offset = OffsetUtf16(offset_utf16.0 + i);
714 assert_eq!(
715 chunk.clip_offset_utf16(test_offset, Bias::Left),
716 offset_utf16,
717 "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
718 test_offset
719 );
720 assert_eq!(
721 chunk.clip_offset_utf16(test_offset, Bias::Right),
722 OffsetUtf16(offset_utf16.0 + c.len_utf16()),
723 "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
724 test_offset
725 );
726 }
727
728 if c == '\n' {
729 point.row += 1;
730 point.column = 0;
731 point_utf16.row += 1;
732 point_utf16.column = 0;
733 } else {
734 point.column += c.len_utf8() as u32;
735 point_utf16.column += c.len_utf16() as u32;
736 }
737
738 offset += c.len_utf8();
739 offset_utf16.0 += c.len_utf16();
740 }
741
742 let final_point = chunk.offset_to_point(offset);
743 assert_eq!(point, final_point, "mismatch at final offset {}", offset);
744 assert_eq!(
745 chunk.point_to_offset(point),
746 offset,
747 "mismatch at point {:?}",
748 point
749 );
750 assert_eq!(
751 chunk.offset_to_offset_utf16(offset),
752 offset_utf16,
753 "mismatch at offset {}",
754 offset
755 );
756 assert_eq!(
757 chunk.offset_utf16_to_offset(offset_utf16),
758 offset,
759 "mismatch at offset_utf16 {:?}",
760 offset_utf16
761 );
762 assert_eq!(
763 chunk.point_to_point_utf16(point),
764 point_utf16,
765 "mismatch at final point {:?}",
766 point
767 );
768 assert_eq!(
769 chunk.point_utf16_to_offset(point_utf16, false),
770 offset,
771 "mismatch at final point_utf16 {:?}",
772 point_utf16
773 );
774 assert_eq!(
775 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
776 point,
777 "mismatch for unclipped_point_utf16_to_point at final point {:?}",
778 point_utf16
779 );
780 assert_eq!(
781 chunk.clip_point(point, Bias::Left),
782 point,
783 "incorrect left clip at final point {:?}",
784 point
785 );
786 assert_eq!(
787 chunk.clip_point(point, Bias::Right),
788 point,
789 "incorrect right clip at final point {:?}",
790 point
791 );
792 assert_eq!(
793 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
794 point_utf16,
795 "incorrect left clip_point_utf16 at final point {:?}",
796 point_utf16
797 );
798 assert_eq!(
799 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
800 point_utf16,
801 "incorrect right clip_point_utf16 at final point {:?}",
802 point_utf16
803 );
804 assert_eq!(
805 chunk.clip_offset_utf16(offset_utf16, Bias::Left),
806 offset_utf16,
807 "incorrect left clip_offset_utf16 at final offset {:?}",
808 offset_utf16
809 );
810 assert_eq!(
811 chunk.clip_offset_utf16(offset_utf16, Bias::Right),
812 offset_utf16,
813 "incorrect right clip_offset_utf16 at final offset {:?}",
814 offset_utf16
815 );
816
817 // Verify length methods
818 assert_eq!(chunk.len(), text.len());
819 assert_eq!(
820 chunk.len_utf16().0,
821 text.chars().map(|c| c.len_utf16()).sum::<usize>()
822 );
823
824 // Verify line counting
825 let lines = chunk.lines();
826 let mut newline_count = 0;
827 let mut last_line_len = 0;
828 for c in text.chars() {
829 if c == '\n' {
830 newline_count += 1;
831 last_line_len = 0;
832 } else {
833 last_line_len += c.len_utf8() as u32;
834 }
835 }
836 assert_eq!(lines, Point::new(newline_count, last_line_len));
837
838 // Verify first/last line chars
839 if !text.is_empty() {
840 let first_line = text.split('\n').next().unwrap();
841 assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
842
843 let last_line = text.split('\n').last().unwrap();
844 assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
845 assert_eq!(
846 chunk.last_line_len_utf16(),
847 last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
848 );
849 }
850
851 // Verify longest row
852 let (longest_row, longest_chars) = chunk.longest_row();
853 let mut max_chars = 0;
854 let mut current_row = 0;
855 let mut current_chars = 0;
856 let mut max_row = 0;
857
858 for c in text.chars() {
859 if c == '\n' {
860 if current_chars > max_chars {
861 max_chars = current_chars;
862 max_row = current_row;
863 }
864 current_row += 1;
865 current_chars = 0;
866 } else {
867 current_chars += 1;
868 }
869 }
870
871 if current_chars > max_chars {
872 max_chars = current_chars;
873 max_row = current_row;
874 }
875
876 assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
877 }
878}