1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
2use arrayvec::ArrayString;
3use std::{cmp, ops::Range};
4use sum_tree::Bias;
5use unicode_segmentation::GraphemeCursor;
6use util::debug_panic;
7
8pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
9pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
10
11#[derive(Clone, Debug, Default)]
12pub struct Chunk {
13 /// If bit[i] is set, then the character at index i is the start of a UTF-8 character in the
14 /// text.
15 chars: u128,
16 /// The number of set bits is the number of UTF-16 code units it would take to represent the
17 /// text.
18 ///
19 /// Bit[i] is set if text[i] is the start of a UTF-8 character. If the character would
20 /// take two UTF-16 code units, then bit[i+1] is also set. (Rust chars never take more
21 /// than two UTF-16 code units.)
22 chars_utf16: u128,
23 /// If bit[i] is set, then the character at index i is an ascii newline.
24 newlines: u128,
25 /// If bit[i] is set, then the character at index i is an ascii tab.
26 pub tabs: u128,
27 pub text: ArrayString<MAX_BASE>,
28}
29
30impl Chunk {
31 #[inline(always)]
32 pub fn new(text: &str) -> Self {
33 let mut this = Chunk::default();
34 this.push_str(text);
35 this
36 }
37
38 #[inline(always)]
39 pub fn push_str(&mut self, text: &str) {
40 for (char_ix, c) in text.char_indices() {
41 let ix = self.text.len() + char_ix;
42 self.chars |= 1 << ix;
43 self.chars_utf16 |= 1 << ix;
44 self.chars_utf16 |= (c.len_utf16() as u128) << ix;
45 self.newlines |= ((c == '\n') as u128) << ix;
46 self.tabs |= ((c == '\t') as u128) << ix;
47 }
48 self.text.push_str(text);
49 }
50
51 #[inline(always)]
52 pub fn append(&mut self, slice: ChunkSlice) {
53 if slice.is_empty() {
54 return;
55 };
56
57 let base_ix = self.text.len();
58 self.chars |= slice.chars << base_ix;
59 self.chars_utf16 |= slice.chars_utf16 << base_ix;
60 self.newlines |= slice.newlines << base_ix;
61 self.tabs |= slice.tabs << base_ix;
62 self.text.push_str(slice.text);
63 }
64
65 #[inline(always)]
66 pub fn as_slice(&self) -> ChunkSlice<'_> {
67 ChunkSlice {
68 chars: self.chars,
69 chars_utf16: self.chars_utf16,
70 newlines: self.newlines,
71 tabs: self.tabs,
72 text: &self.text,
73 }
74 }
75
76 #[inline(always)]
77 pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
78 self.as_slice().slice(range)
79 }
80
81 #[inline(always)]
82 pub const fn chars(&self) -> u128 {
83 self.chars
84 }
85}
86
87#[derive(Clone, Copy, Debug)]
88pub struct ChunkSlice<'a> {
89 chars: u128,
90 chars_utf16: u128,
91 newlines: u128,
92 tabs: u128,
93 text: &'a str,
94}
95
96impl Into<Chunk> for ChunkSlice<'_> {
97 fn into(self) -> Chunk {
98 Chunk {
99 chars: self.chars,
100 chars_utf16: self.chars_utf16,
101 newlines: self.newlines,
102 tabs: self.tabs,
103 text: self.text.try_into().unwrap(),
104 }
105 }
106}
107
108impl<'a> ChunkSlice<'a> {
109 #[inline(always)]
110 pub const fn is_empty(&self) -> bool {
111 self.text.is_empty()
112 }
113
114 #[inline(always)]
115 pub const fn is_char_boundary(self, offset: usize) -> bool {
116 self.text.is_char_boundary(offset)
117 }
118
119 #[inline(always)]
120 pub const fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
121 if mid == MAX_BASE {
122 let left = self;
123 let right = ChunkSlice {
124 chars: 0,
125 chars_utf16: 0,
126 newlines: 0,
127 tabs: 0,
128 text: "",
129 };
130 (left, right)
131 } else {
132 let mask = (1u128 << mid) - 1;
133 let (left_text, right_text) = self.text.split_at(mid);
134 let left = ChunkSlice {
135 chars: self.chars & mask,
136 chars_utf16: self.chars_utf16 & mask,
137 newlines: self.newlines & mask,
138 tabs: self.tabs & mask,
139 text: left_text,
140 };
141 let right = ChunkSlice {
142 chars: self.chars >> mid,
143 chars_utf16: self.chars_utf16 >> mid,
144 newlines: self.newlines >> mid,
145 tabs: self.tabs >> mid,
146 text: right_text,
147 };
148 (left, right)
149 }
150 }
151
152 #[inline(always)]
153 pub fn slice(self, range: Range<usize>) -> Self {
154 let mask = if range.end == MAX_BASE {
155 u128::MAX
156 } else {
157 debug_assert!(
158 self.is_char_boundary(range.end),
159 "Invalid range end {} in {:?}",
160 range.end,
161 self
162 );
163 (1u128 << range.end) - 1
164 };
165 if range.start == MAX_BASE {
166 Self {
167 chars: 0,
168 chars_utf16: 0,
169 newlines: 0,
170 tabs: 0,
171 text: "",
172 }
173 } else {
174 debug_assert!(
175 self.is_char_boundary(range.start),
176 "Invalid range start {} in {:?}",
177 range.start,
178 self
179 );
180 Self {
181 chars: (self.chars & mask) >> range.start,
182 chars_utf16: (self.chars_utf16 & mask) >> range.start,
183 newlines: (self.newlines & mask) >> range.start,
184 tabs: (self.tabs & mask) >> range.start,
185 text: &self.text[range],
186 }
187 }
188 }
189
190 #[inline(always)]
191 pub fn text_summary(&self) -> TextSummary {
192 let mut chars = 0;
193 let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
194 TextSummary {
195 len: self.len(),
196 chars,
197 len_utf16: self.len_utf16(),
198 lines: self.lines(),
199 first_line_chars: self.first_line_chars(),
200 last_line_chars: self.last_line_chars(),
201 last_line_len_utf16: self.last_line_len_utf16(),
202 longest_row,
203 longest_row_chars,
204 }
205 }
206
207 /// Get length in bytes
208 #[inline(always)]
209 pub const fn len(&self) -> usize {
210 self.text.len()
211 }
212
213 /// Get length in UTF-16 code units
214 #[inline(always)]
215 pub const fn len_utf16(&self) -> OffsetUtf16 {
216 OffsetUtf16(self.chars_utf16.count_ones() as usize)
217 }
218
219 /// Get point representing number of lines and length of last line
220 #[inline(always)]
221 pub const fn lines(&self) -> Point {
222 let row = self.newlines.count_ones();
223 let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
224 Point::new(row, column)
225 }
226
227 /// Get number of chars in first line
228 #[inline(always)]
229 pub const fn first_line_chars(&self) -> u32 {
230 if self.newlines == 0 {
231 self.chars.count_ones()
232 } else {
233 let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
234 (self.chars & mask).count_ones()
235 }
236 }
237
238 /// Get number of chars in last line
239 #[inline(always)]
240 pub const fn last_line_chars(&self) -> u32 {
241 if self.newlines == 0 {
242 self.chars.count_ones()
243 } else {
244 let mask = !(u128::MAX >> self.newlines.leading_zeros());
245 (self.chars & mask).count_ones()
246 }
247 }
248
249 /// Get number of UTF-16 code units in last line
250 #[inline(always)]
251 pub const fn last_line_len_utf16(&self) -> u32 {
252 if self.newlines == 0 {
253 self.chars_utf16.count_ones()
254 } else {
255 let mask = !(u128::MAX >> self.newlines.leading_zeros());
256 (self.chars_utf16 & mask).count_ones()
257 }
258 }
259
260 /// Get the longest row in the chunk and its length in characters.
261 /// Calculate the total number of characters in the chunk along the way.
262 #[inline(always)]
263 pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
264 let mut chars = self.chars;
265 let mut newlines = self.newlines;
266 *total_chars = 0;
267 let mut row = 0;
268 let mut longest_row = 0;
269 let mut longest_row_chars = 0;
270 while newlines > 0 {
271 let newline_ix = newlines.trailing_zeros();
272 let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
273 *total_chars += usize::from(row_chars);
274 if row_chars > longest_row_chars {
275 longest_row = row;
276 longest_row_chars = row_chars;
277 }
278
279 newlines >>= newline_ix;
280 newlines >>= 1;
281 chars >>= newline_ix;
282 chars >>= 1;
283 row += 1;
284 *total_chars += 1;
285 }
286
287 let row_chars = chars.count_ones() as u8;
288 *total_chars += usize::from(row_chars);
289 if row_chars > longest_row_chars {
290 (row, row_chars as u32)
291 } else {
292 (longest_row, longest_row_chars as u32)
293 }
294 }
295
296 #[inline(always)]
297 pub const fn offset_to_point(&self, offset: usize) -> Point {
298 let mask = if offset == MAX_BASE {
299 u128::MAX
300 } else {
301 (1u128 << offset) - 1
302 };
303 let row = (self.newlines & mask).count_ones();
304 let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
305 let column = (offset - newline_ix as usize) as u32;
306 Point::new(row, column)
307 }
308
309 #[inline(always)]
310 pub fn point_to_offset(&self, point: Point) -> usize {
311 if point.row > self.lines().row {
312 debug_panic!(
313 "point {:?} extends beyond rows for string {:?}",
314 point,
315 self.text
316 );
317 return self.len();
318 }
319
320 let row_offset_range = self.offset_range_for_row(point.row);
321 if point.column > row_offset_range.len() as u32 {
322 debug_panic!(
323 "point {:?} extends beyond row for string {:?}",
324 point,
325 self.text
326 );
327 row_offset_range.end
328 } else {
329 row_offset_range.start + point.column as usize
330 }
331 }
332
333 #[inline(always)]
334 pub const fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
335 let mask = if offset == MAX_BASE {
336 u128::MAX
337 } else {
338 (1u128 << offset) - 1
339 };
340 OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
341 }
342
343 #[inline(always)]
344 pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
345 if target.0 == 0 {
346 0
347 } else {
348 let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
349 if ix == MAX_BASE {
350 MAX_BASE
351 } else {
352 let utf8_additional_len = cmp::min(
353 (self.chars_utf16 >> ix).trailing_zeros() as usize,
354 self.text.len() - ix,
355 );
356 ix + utf8_additional_len
357 }
358 }
359 }
360
361 #[inline(always)]
362 pub const fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
363 let mask = if offset == MAX_BASE {
364 u128::MAX
365 } else {
366 (1u128 << offset) - 1
367 };
368 let row = (self.newlines & mask).count_ones();
369 let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
370 let column = if newline_ix as usize == MAX_BASE {
371 0
372 } else {
373 ((self.chars_utf16 & mask) >> newline_ix).count_ones()
374 };
375 PointUtf16::new(row, column)
376 }
377
378 #[inline(always)]
379 pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
380 self.offset_to_point_utf16(self.point_to_offset(point))
381 }
382
383 #[inline(always)]
384 pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
385 let lines = self.lines();
386 if point.row > lines.row {
387 if !clip {
388 debug_panic!(
389 "point {:?} is beyond this chunk's extent {:?}",
390 point,
391 self.text
392 );
393 }
394 return self.len();
395 }
396
397 let row_offset_range = self.offset_range_for_row(point.row);
398 let line = self.slice(row_offset_range.clone());
399 if point.column > line.last_line_len_utf16() {
400 if !clip {
401 debug_panic!(
402 "point {:?} is beyond the end of the line in chunk {:?}",
403 point,
404 self.text
405 );
406 }
407 return line.len();
408 }
409
410 let mut offset = row_offset_range.start;
411 if point.column > 0 {
412 offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
413 if !self.text.is_char_boundary(offset) {
414 offset -= 1;
415 while !self.text.is_char_boundary(offset) {
416 offset -= 1;
417 }
418 if !clip {
419 debug_panic!(
420 "point {:?} is within character in chunk {:?}",
421 point,
422 self.text,
423 );
424 }
425 }
426 }
427 offset
428 }
429
430 #[inline(always)]
431 pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
432 let max_point = self.lines();
433 if point.0.row > max_point.row {
434 return max_point;
435 }
436
437 let row_offset_range = self.offset_range_for_row(point.0.row);
438 let line = self.slice(row_offset_range);
439 if point.0.column == 0 {
440 Point::new(point.0.row, 0)
441 } else if point.0.column >= line.len_utf16().0 as u32 {
442 Point::new(point.0.row, line.len() as u32)
443 } else {
444 let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
445 while !line.text.is_char_boundary(column) {
446 column -= 1;
447 }
448 Point::new(point.0.row, column as u32)
449 }
450 }
451
452 #[inline(always)]
453 pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
454 let max_point = self.lines();
455 if point.row > max_point.row {
456 return max_point;
457 }
458
459 let line = self.slice(self.offset_range_for_row(point.row));
460 if point.column == 0 {
461 point
462 } else if point.column >= line.len() as u32 {
463 Point::new(point.row, line.len() as u32)
464 } else {
465 let mut column = point.column as usize;
466 let bytes = line.text.as_bytes();
467 if bytes[column - 1] < 128 && bytes[column] < 128 {
468 return Point::new(point.row, column as u32);
469 }
470
471 let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
472 loop {
473 if line.is_char_boundary(column)
474 && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
475 {
476 break;
477 }
478
479 match bias {
480 Bias::Left => column -= 1,
481 Bias::Right => column += 1,
482 }
483 grapheme_cursor.set_cursor(column);
484 }
485 Point::new(point.row, column as u32)
486 }
487 }
488
489 #[inline(always)]
490 pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
491 let max_point = self.lines();
492 if point.0.row > max_point.row {
493 PointUtf16::new(max_point.row, self.last_line_len_utf16())
494 } else {
495 let line = self.slice(self.offset_range_for_row(point.0.row));
496 let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
497 PointUtf16::new(point.0.row, column.0 as u32)
498 }
499 }
500
501 #[inline(always)]
502 pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
503 if target == OffsetUtf16::default() {
504 OffsetUtf16::default()
505 } else if target >= self.len_utf16() {
506 self.len_utf16()
507 } else {
508 let mut offset = self.offset_utf16_to_offset(target);
509 while !self.text.is_char_boundary(offset) {
510 if bias == Bias::Left {
511 offset -= 1;
512 } else {
513 offset += 1;
514 }
515 }
516 self.offset_to_offset_utf16(offset)
517 }
518 }
519
520 #[inline(always)]
521 fn offset_range_for_row(&self, row: u32) -> Range<usize> {
522 let row_start = if row > 0 {
523 nth_set_bit(self.newlines, row as usize) + 1
524 } else {
525 0
526 };
527 let row_len = if row_start == MAX_BASE {
528 0
529 } else {
530 cmp::min(
531 (self.newlines >> row_start).trailing_zeros(),
532 (self.text.len() - row_start) as u32,
533 )
534 };
535 row_start..row_start + row_len as usize
536 }
537
538 #[inline(always)]
539 pub const fn tabs(&self) -> Tabs {
540 Tabs {
541 tabs: self.tabs,
542 chars: self.chars,
543 }
544 }
545}
546
547pub struct Tabs {
548 tabs: u128,
549 chars: u128,
550}
551
552#[derive(Debug, PartialEq, Eq)]
553pub struct TabPosition {
554 pub byte_offset: usize,
555 pub char_offset: usize,
556}
557
558impl Iterator for Tabs {
559 type Item = TabPosition;
560
561 fn next(&mut self) -> Option<Self::Item> {
562 if self.tabs == 0 {
563 return None;
564 }
565
566 let tab_offset = self.tabs.trailing_zeros() as usize;
567 let chars_mask = (1 << tab_offset) - 1;
568 let char_offset = (self.chars & chars_mask).count_ones() as usize;
569
570 // Since tabs are 1 byte the tab offset is the same as the byte offset
571 let position = TabPosition {
572 byte_offset: tab_offset,
573 char_offset,
574 };
575 // Remove the tab we've just seen
576 self.tabs ^= 1 << tab_offset;
577
578 Some(position)
579 }
580}
581
582/// Finds the n-th bit that is set to 1.
583#[inline(always)]
584const fn nth_set_bit(v: u128, n: usize) -> usize {
585 let low = v as u64;
586 let high = (v >> 64) as u64;
587
588 let low_count = low.count_ones() as usize;
589 if n > low_count {
590 64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
591 } else {
592 nth_set_bit_u64(low, n as u64) as usize
593 }
594}
595
596#[inline(always)]
597const fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
598 let v = v.reverse_bits();
599 let mut s: u64 = 64;
600
601 // Parallel bit count intermediates
602 let a = v - ((v >> 1) & (u64::MAX / 3));
603 let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
604 let c = (b + (b >> 4)) & (u64::MAX / 0x11);
605 let d = (c + (c >> 8)) & (u64::MAX / 0x101);
606
607 // Branchless select
608 let t = (d >> 32) + (d >> 48);
609 s -= (t.wrapping_sub(n) & 256) >> 3;
610 n -= t & (t.wrapping_sub(n) >> 8);
611
612 let t = (d >> (s - 16)) & 0xff;
613 s -= (t.wrapping_sub(n) & 256) >> 4;
614 n -= t & (t.wrapping_sub(n) >> 8);
615
616 let t = (c >> (s - 8)) & 0xf;
617 s -= (t.wrapping_sub(n) & 256) >> 5;
618 n -= t & (t.wrapping_sub(n) >> 8);
619
620 let t = (b >> (s - 4)) & 0x7;
621 s -= (t.wrapping_sub(n) & 256) >> 6;
622 n -= t & (t.wrapping_sub(n) >> 8);
623
624 let t = (a >> (s - 2)) & 0x3;
625 s -= (t.wrapping_sub(n) & 256) >> 7;
626 n -= t & (t.wrapping_sub(n) >> 8);
627
628 let t = (v >> (s - 1)) & 0x1;
629 s -= (t.wrapping_sub(n) & 256) >> 8;
630
631 65 - s - 1
632}
633
634#[cfg(test)]
635mod tests {
636 use super::*;
637 use rand::prelude::*;
638 use util::RandomCharIter;
639
640 #[gpui::test(iterations = 100)]
641 fn test_random_chunks(mut rng: StdRng) {
642 let text = random_string_with_utf8_len(&mut rng, MAX_BASE);
643 log::info!("Chunk: {:?}", text);
644 let chunk = Chunk::new(&text);
645 verify_chunk(chunk.as_slice(), &text);
646
647 // Verify Chunk::chars() bitmap
648 let expected_chars = char_offsets(&text)
649 .into_iter()
650 .inspect(|i| assert!(*i < 128))
651 .fold(0u128, |acc, i| acc | (1 << i));
652 assert_eq!(chunk.chars(), expected_chars);
653
654 for _ in 0..10 {
655 let mut start = rng.random_range(0..=chunk.text.len());
656 let mut end = rng.random_range(start..=chunk.text.len());
657 while !chunk.text.is_char_boundary(start) {
658 start -= 1;
659 }
660 while !chunk.text.is_char_boundary(end) {
661 end -= 1;
662 }
663 let range = start..end;
664 log::info!("Range: {:?}", range);
665 let text_slice = &text[range.clone()];
666 let chunk_slice = chunk.slice(range);
667 verify_chunk(chunk_slice, text_slice);
668 }
669 }
670
671 #[gpui::test(iterations = 100)]
672 fn test_split_chunk_slice(mut rng: StdRng) {
673 let text = &random_string_with_utf8_len(&mut rng, MAX_BASE);
674 let chunk = Chunk::new(text);
675 let offset = char_offsets_with_end(text)
676 .into_iter()
677 .choose(&mut rng)
678 .unwrap();
679 let (a, b) = chunk.as_slice().split_at(offset);
680 let (a_str, b_str) = text.split_at(offset);
681 verify_chunk(a, a_str);
682 verify_chunk(b, b_str);
683 }
684
685 #[gpui::test(iterations = 1000)]
686 fn test_nth_set_bit_random(mut rng: StdRng) {
687 let set_count = rng.random_range(0..=128);
688 let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
689 set_bits.sort();
690 let mut n = 0;
691 for ix in set_bits.iter().copied() {
692 n |= 1 << ix;
693 }
694
695 for (mut ix, position) in set_bits.into_iter().enumerate() {
696 ix += 1;
697 assert_eq!(
698 nth_set_bit(n, ix),
699 position,
700 "nth_set_bit({:0128b}, {})",
701 n,
702 ix
703 );
704 }
705 }
706
707 /// Returns a (biased) random string whose UTF-8 length is no more than `len`.
708 fn random_string_with_utf8_len(rng: &mut StdRng, len: usize) -> String {
709 let mut str = String::new();
710 let mut chars = RandomCharIter::new(rng);
711 loop {
712 let ch = chars.next().unwrap();
713 if str.len() + ch.len_utf8() > len {
714 break;
715 }
716 str.push(ch);
717 }
718 str
719 }
720
721 #[gpui::test(iterations = 1000)]
722 fn test_append_random_strings(mut rng: StdRng) {
723 let len1 = rng.random_range(0..=MAX_BASE);
724 let len2 = rng.random_range(0..=MAX_BASE).saturating_sub(len1);
725 let str1 = random_string_with_utf8_len(&mut rng, len1);
726 let str2 = random_string_with_utf8_len(&mut rng, len2);
727 let mut chunk1 = Chunk::new(&str1);
728 let chunk2 = Chunk::new(&str2);
729 let char_offsets = char_offsets_with_end(&str2);
730 let start_index = rng.random_range(0..char_offsets.len());
731 let start_offset = char_offsets[start_index];
732 let end_offset = char_offsets[rng.random_range(start_index..char_offsets.len())];
733 chunk1.append(chunk2.slice(start_offset..end_offset));
734 verify_chunk(chunk1.as_slice(), &(str1 + &str2[start_offset..end_offset]));
735 }
736
737 /// Return the byte offsets for each character in a string.
738 ///
739 /// These are valid offsets to split the string.
740 fn char_offsets(text: &str) -> Vec<usize> {
741 text.char_indices().map(|(i, _c)| i).collect()
742 }
743
744 /// Return the byte offsets for each character in a string, plus the offset
745 /// past the end of the string.
746 fn char_offsets_with_end(text: &str) -> Vec<usize> {
747 let mut v = char_offsets(text);
748 v.push(text.len());
749 v
750 }
751
752 fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
753 let mut offset = 0;
754 let mut offset_utf16 = OffsetUtf16(0);
755 let mut point = Point::zero();
756 let mut point_utf16 = PointUtf16::zero();
757
758 log::info!("Verifying chunk {:?}", text);
759 assert_eq!(chunk.offset_to_point(0), Point::zero());
760
761 let mut expected_tab_positions = Vec::new();
762
763 for (char_offset, c) in text.chars().enumerate() {
764 let expected_point = chunk.offset_to_point(offset);
765 assert_eq!(point, expected_point, "mismatch at offset {}", offset);
766 assert_eq!(
767 chunk.point_to_offset(point),
768 offset,
769 "mismatch at point {:?}",
770 point
771 );
772 assert_eq!(
773 chunk.offset_to_offset_utf16(offset),
774 offset_utf16,
775 "mismatch at offset {}",
776 offset
777 );
778 assert_eq!(
779 chunk.offset_utf16_to_offset(offset_utf16),
780 offset,
781 "mismatch at offset_utf16 {:?}",
782 offset_utf16
783 );
784 assert_eq!(
785 chunk.point_to_point_utf16(point),
786 point_utf16,
787 "mismatch at point {:?}",
788 point
789 );
790 assert_eq!(
791 chunk.point_utf16_to_offset(point_utf16, false),
792 offset,
793 "mismatch at point_utf16 {:?}",
794 point_utf16
795 );
796 assert_eq!(
797 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
798 point,
799 "mismatch for unclipped_point_utf16_to_point at {:?}",
800 point_utf16
801 );
802
803 assert_eq!(
804 chunk.clip_point(point, Bias::Left),
805 point,
806 "incorrect left clip at {:?}",
807 point
808 );
809 assert_eq!(
810 chunk.clip_point(point, Bias::Right),
811 point,
812 "incorrect right clip at {:?}",
813 point
814 );
815
816 for i in 1..c.len_utf8() {
817 let test_point = Point::new(point.row, point.column + i as u32);
818 assert_eq!(
819 chunk.clip_point(test_point, Bias::Left),
820 point,
821 "incorrect left clip within multi-byte char at {:?}",
822 test_point
823 );
824 assert_eq!(
825 chunk.clip_point(test_point, Bias::Right),
826 Point::new(point.row, point.column + c.len_utf8() as u32),
827 "incorrect right clip within multi-byte char at {:?}",
828 test_point
829 );
830 }
831
832 for i in 1..c.len_utf16() {
833 let test_point = Unclipped(PointUtf16::new(
834 point_utf16.row,
835 point_utf16.column + i as u32,
836 ));
837 assert_eq!(
838 chunk.unclipped_point_utf16_to_point(test_point),
839 point,
840 "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
841 test_point
842 );
843 assert_eq!(
844 chunk.clip_point_utf16(test_point, Bias::Left),
845 point_utf16,
846 "incorrect left clip_point_utf16 within multi-byte char at {:?}",
847 test_point
848 );
849 assert_eq!(
850 chunk.clip_point_utf16(test_point, Bias::Right),
851 PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
852 "incorrect right clip_point_utf16 within multi-byte char at {:?}",
853 test_point
854 );
855
856 let test_offset = OffsetUtf16(offset_utf16.0 + i);
857 assert_eq!(
858 chunk.clip_offset_utf16(test_offset, Bias::Left),
859 offset_utf16,
860 "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
861 test_offset
862 );
863 assert_eq!(
864 chunk.clip_offset_utf16(test_offset, Bias::Right),
865 OffsetUtf16(offset_utf16.0 + c.len_utf16()),
866 "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
867 test_offset
868 );
869 }
870
871 if c == '\n' {
872 point.row += 1;
873 point.column = 0;
874 point_utf16.row += 1;
875 point_utf16.column = 0;
876 } else {
877 point.column += c.len_utf8() as u32;
878 point_utf16.column += c.len_utf16() as u32;
879 }
880
881 if c == '\t' {
882 expected_tab_positions.push(TabPosition {
883 byte_offset: offset,
884 char_offset,
885 });
886 }
887
888 offset += c.len_utf8();
889 offset_utf16.0 += c.len_utf16();
890 }
891
892 let final_point = chunk.offset_to_point(offset);
893 assert_eq!(point, final_point, "mismatch at final offset {}", offset);
894 assert_eq!(
895 chunk.point_to_offset(point),
896 offset,
897 "mismatch at point {:?}",
898 point
899 );
900 assert_eq!(
901 chunk.offset_to_offset_utf16(offset),
902 offset_utf16,
903 "mismatch at offset {}",
904 offset
905 );
906 assert_eq!(
907 chunk.offset_utf16_to_offset(offset_utf16),
908 offset,
909 "mismatch at offset_utf16 {:?}",
910 offset_utf16
911 );
912 assert_eq!(
913 chunk.point_to_point_utf16(point),
914 point_utf16,
915 "mismatch at final point {:?}",
916 point
917 );
918 assert_eq!(
919 chunk.point_utf16_to_offset(point_utf16, false),
920 offset,
921 "mismatch at final point_utf16 {:?}",
922 point_utf16
923 );
924 assert_eq!(
925 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
926 point,
927 "mismatch for unclipped_point_utf16_to_point at final point {:?}",
928 point_utf16
929 );
930 assert_eq!(
931 chunk.clip_point(point, Bias::Left),
932 point,
933 "incorrect left clip at final point {:?}",
934 point
935 );
936 assert_eq!(
937 chunk.clip_point(point, Bias::Right),
938 point,
939 "incorrect right clip at final point {:?}",
940 point
941 );
942 assert_eq!(
943 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
944 point_utf16,
945 "incorrect left clip_point_utf16 at final point {:?}",
946 point_utf16
947 );
948 assert_eq!(
949 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
950 point_utf16,
951 "incorrect right clip_point_utf16 at final point {:?}",
952 point_utf16
953 );
954 assert_eq!(
955 chunk.clip_offset_utf16(offset_utf16, Bias::Left),
956 offset_utf16,
957 "incorrect left clip_offset_utf16 at final offset {:?}",
958 offset_utf16
959 );
960 assert_eq!(
961 chunk.clip_offset_utf16(offset_utf16, Bias::Right),
962 offset_utf16,
963 "incorrect right clip_offset_utf16 at final offset {:?}",
964 offset_utf16
965 );
966
967 // Verify length methods
968 assert_eq!(chunk.len(), text.len());
969 assert_eq!(
970 chunk.len_utf16().0,
971 text.chars().map(|c| c.len_utf16()).sum::<usize>()
972 );
973
974 // Verify line counting
975 let lines = chunk.lines();
976 let mut newline_count = 0;
977 let mut last_line_len = 0;
978 for c in text.chars() {
979 if c == '\n' {
980 newline_count += 1;
981 last_line_len = 0;
982 } else {
983 last_line_len += c.len_utf8() as u32;
984 }
985 }
986 assert_eq!(lines, Point::new(newline_count, last_line_len));
987
988 // Verify first/last line chars
989 if !text.is_empty() {
990 let first_line = text.split('\n').next().unwrap();
991 assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
992
993 let last_line = text.split('\n').next_back().unwrap();
994 assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
995 assert_eq!(
996 chunk.last_line_len_utf16(),
997 last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
998 );
999 }
1000
1001 // Verify longest row
1002 let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
1003 let mut max_chars = 0;
1004 let mut current_row = 0;
1005 let mut current_chars = 0;
1006 let mut max_row = 0;
1007
1008 for c in text.chars() {
1009 if c == '\n' {
1010 if current_chars > max_chars {
1011 max_chars = current_chars;
1012 max_row = current_row;
1013 }
1014 current_row += 1;
1015 current_chars = 0;
1016 } else {
1017 current_chars += 1;
1018 }
1019 }
1020
1021 if current_chars > max_chars {
1022 max_chars = current_chars;
1023 max_row = current_row;
1024 }
1025
1026 assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
1027 assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
1028 }
1029}