1use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
2use arrayvec::ArrayString;
3use std::{cmp, ops::Range};
4use sum_tree::Bias;
5use unicode_segmentation::GraphemeCursor;
6use util::debug_panic;
7
8#[cfg(not(all(test, not(rust_analyzer))))]
9pub(crate) type Bitmap = u128;
10#[cfg(all(test, not(rust_analyzer)))]
11pub(crate) type Bitmap = u16;
12
13pub(crate) const MIN_BASE: usize = MAX_BASE / 2;
14pub(crate) const MAX_BASE: usize = Bitmap::BITS as usize;
15
16#[derive(Clone, Debug, Default)]
17pub struct Chunk {
18 /// If bit[i] is set, then the character at index i is the start of a UTF-8 character in the
19 /// text.
20 chars: Bitmap,
21 /// The number of set bits is the number of UTF-16 code units it would take to represent the
22 /// text.
23 ///
24 /// Bit[i] is set if text[i] is the start of a UTF-8 character. If the character would
25 /// take two UTF-16 code units, then bit[i+1] is also set. (Rust chars never take more
26 /// than two UTF-16 code units.)
27 chars_utf16: Bitmap,
28 /// If bit[i] is set, then the character at index i is an ascii newline.
29 newlines: Bitmap,
30 /// If bit[i] is set, then the character at index i is an ascii tab.
31 tabs: Bitmap,
32 pub text: ArrayString<MAX_BASE>,
33}
34
35impl Chunk {
36 pub const MASK_BITS: usize = Bitmap::BITS as usize;
37
38 #[inline(always)]
39 pub fn new(text: &str) -> Self {
40 let mut this = Chunk::default();
41 this.push_str(text);
42 this
43 }
44
45 #[inline(always)]
46 pub fn push_str(&mut self, text: &str) {
47 for (char_ix, c) in text.char_indices() {
48 let ix = self.text.len() + char_ix;
49 self.chars |= 1 << ix;
50 self.chars_utf16 |= 1 << ix;
51 self.chars_utf16 |= (c.len_utf16() as Bitmap) << ix;
52 self.newlines |= ((c == '\n') as Bitmap) << ix;
53 self.tabs |= ((c == '\t') as Bitmap) << ix;
54 }
55 self.text.push_str(text);
56 }
57
58 #[inline(always)]
59 pub fn append(&mut self, slice: ChunkSlice) {
60 if slice.is_empty() {
61 return;
62 };
63
64 let base_ix = self.text.len();
65 self.chars |= slice.chars << base_ix;
66 self.chars_utf16 |= slice.chars_utf16 << base_ix;
67 self.newlines |= slice.newlines << base_ix;
68 self.tabs |= slice.tabs << base_ix;
69 self.text.push_str(slice.text);
70 }
71
72 #[inline(always)]
73 pub fn as_slice(&self) -> ChunkSlice<'_> {
74 ChunkSlice {
75 chars: self.chars,
76 chars_utf16: self.chars_utf16,
77 newlines: self.newlines,
78 tabs: self.tabs,
79 text: &self.text,
80 }
81 }
82
83 #[inline(always)]
84 pub fn slice(&self, range: Range<usize>) -> ChunkSlice<'_> {
85 self.as_slice().slice(range)
86 }
87
88 #[inline(always)]
89 pub fn chars(&self) -> Bitmap {
90 self.chars
91 }
92
93 pub fn tabs(&self) -> Bitmap {
94 self.tabs
95 }
96
97 #[inline(always)]
98 pub fn is_char_boundary(&self, offset: usize) -> bool {
99 (1 as Bitmap).unbounded_shl(offset as u32) & self.chars != 0 || offset == self.text.len()
100 }
101
102 pub fn floor_char_boundary(&self, index: usize) -> usize {
103 #[inline]
104 pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
105 // This is bit magic equivalent to: b < 128 || b >= 192
106 (u8 as i8) >= -0x40
107 }
108
109 if index >= self.text.len() {
110 self.text.len()
111 } else {
112 let mut i = index;
113 while i > 0 {
114 if is_utf8_char_boundary(self.text.as_bytes()[i]) {
115 break;
116 }
117 i -= 1;
118 }
119
120 i
121 }
122 }
123
124 #[track_caller]
125 #[inline(always)]
126 pub fn assert_char_boundary(&self, offset: usize) {
127 if self.is_char_boundary(offset) {
128 return;
129 }
130 panic_char_boundary(self, offset);
131
132 #[cold]
133 #[inline(never)]
134 #[track_caller]
135 fn panic_char_boundary(chunk: &Chunk, offset: usize) {
136 if offset > chunk.text.len() {
137 panic!(
138 "byte index {} is out of bounds of `{:?}` (length: {})",
139 offset,
140 chunk.text,
141 chunk.text.len()
142 );
143 }
144 // find the character
145 let char_start = chunk.floor_char_boundary(offset);
146 // `char_start` must be less than len and a char boundary
147 let ch = chunk
148 .text
149 .get(char_start..)
150 .unwrap()
151 .chars()
152 .next()
153 .unwrap();
154 let char_range = char_start..char_start + ch.len_utf8();
155 panic!(
156 "byte index {} is not a char boundary; it is inside {:?} (bytes {:?})",
157 offset, ch, char_range,
158 );
159 }
160 }
161}
162
163#[derive(Clone, Copy, Debug)]
164pub struct ChunkSlice<'a> {
165 chars: Bitmap,
166 chars_utf16: Bitmap,
167 newlines: Bitmap,
168 tabs: Bitmap,
169 text: &'a str,
170}
171
172impl Into<Chunk> for ChunkSlice<'_> {
173 fn into(self) -> Chunk {
174 Chunk {
175 chars: self.chars,
176 chars_utf16: self.chars_utf16,
177 newlines: self.newlines,
178 tabs: self.tabs,
179 text: self.text.try_into().unwrap(),
180 }
181 }
182}
183
184impl<'a> ChunkSlice<'a> {
185 #[inline(always)]
186 pub fn is_empty(&self) -> bool {
187 self.text.is_empty()
188 }
189
190 #[inline(always)]
191 pub fn is_char_boundary(self, offset: usize) -> bool {
192 self.text.is_char_boundary(offset)
193 }
194
195 #[inline(always)]
196 pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
197 if mid == MAX_BASE {
198 let left = self;
199 let right = ChunkSlice {
200 chars: 0,
201 chars_utf16: 0,
202 newlines: 0,
203 tabs: 0,
204 text: "",
205 };
206 (left, right)
207 } else {
208 let mask = ((1 as Bitmap) << mid) - 1;
209 let (left_text, right_text) = self.text.split_at(mid);
210 let left = ChunkSlice {
211 chars: self.chars & mask,
212 chars_utf16: self.chars_utf16 & mask,
213 newlines: self.newlines & mask,
214 tabs: self.tabs & mask,
215 text: left_text,
216 };
217 let right = ChunkSlice {
218 chars: self.chars >> mid,
219 chars_utf16: self.chars_utf16 >> mid,
220 newlines: self.newlines >> mid,
221 tabs: self.tabs >> mid,
222 text: right_text,
223 };
224 (left, right)
225 }
226 }
227
228 #[inline(always)]
229 pub fn slice(self, range: Range<usize>) -> Self {
230 debug_assert!(
231 self.is_char_boundary(range.end),
232 "Invalid range end {} in {:?}",
233 range.end,
234 self
235 );
236 let mask = (1 as Bitmap)
237 .unbounded_shl(range.end as u32)
238 .wrapping_sub(1);
239 if range.start == MAX_BASE {
240 Self {
241 chars: 0,
242 chars_utf16: 0,
243 newlines: 0,
244 tabs: 0,
245 text: "",
246 }
247 } else {
248 debug_assert!(
249 self.is_char_boundary(range.start),
250 "Invalid range start {} in {:?}",
251 range.start,
252 self
253 );
254 Self {
255 chars: (self.chars & mask) >> range.start,
256 chars_utf16: (self.chars_utf16 & mask) >> range.start,
257 newlines: (self.newlines & mask) >> range.start,
258 tabs: (self.tabs & mask) >> range.start,
259 text: &self.text[range],
260 }
261 }
262 }
263
264 #[inline(always)]
265 pub fn text_summary(&self) -> TextSummary {
266 let mut chars = 0;
267 let (longest_row, longest_row_chars) = self.longest_row(&mut chars);
268 TextSummary {
269 len: self.len(),
270 chars,
271 len_utf16: self.len_utf16(),
272 lines: self.lines(),
273 first_line_chars: self.first_line_chars(),
274 last_line_chars: self.last_line_chars(),
275 last_line_len_utf16: self.last_line_len_utf16(),
276 longest_row,
277 longest_row_chars,
278 }
279 }
280
281 /// Get length in bytes
282 #[inline(always)]
283 pub fn len(&self) -> usize {
284 self.text.len()
285 }
286
287 /// Get length in UTF-16 code units
288 #[inline(always)]
289 pub fn len_utf16(&self) -> OffsetUtf16 {
290 OffsetUtf16(self.chars_utf16.count_ones() as usize)
291 }
292
293 /// Get point representing number of lines and length of last line
294 #[inline(always)]
295 pub fn lines(&self) -> Point {
296 let row = self.newlines.count_ones();
297 let column = self.newlines.leading_zeros() - (Bitmap::BITS - self.text.len() as u32);
298 Point::new(row, column)
299 }
300
301 /// Get number of chars in first line
302 #[inline(always)]
303 pub fn first_line_chars(&self) -> u32 {
304 if self.newlines == 0 {
305 self.chars.count_ones()
306 } else {
307 let mask = ((1 as Bitmap) << self.newlines.trailing_zeros()) - 1;
308 (self.chars & mask).count_ones()
309 }
310 }
311
312 /// Get number of chars in last line
313 #[inline(always)]
314 pub fn last_line_chars(&self) -> u32 {
315 if self.newlines == 0 {
316 self.chars.count_ones()
317 } else {
318 let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
319 (self.chars & mask).count_ones()
320 }
321 }
322
323 /// Get number of UTF-16 code units in last line
324 #[inline(always)]
325 pub fn last_line_len_utf16(&self) -> u32 {
326 if self.newlines == 0 {
327 self.chars_utf16.count_ones()
328 } else {
329 let mask = !(Bitmap::MAX >> self.newlines.leading_zeros());
330 (self.chars_utf16 & mask).count_ones()
331 }
332 }
333
334 /// Get the longest row in the chunk and its length in characters.
335 /// Calculate the total number of characters in the chunk along the way.
336 #[inline(always)]
337 pub fn longest_row(&self, total_chars: &mut usize) -> (u32, u32) {
338 let mut chars = self.chars;
339 let mut newlines = self.newlines;
340 *total_chars = 0;
341 let mut row = 0;
342 let mut longest_row = 0;
343 let mut longest_row_chars = 0;
344 while newlines > 0 {
345 let newline_ix = newlines.trailing_zeros();
346 let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
347 *total_chars += usize::from(row_chars);
348 if row_chars > longest_row_chars {
349 longest_row = row;
350 longest_row_chars = row_chars;
351 }
352
353 newlines >>= newline_ix;
354 newlines >>= 1;
355 chars >>= newline_ix;
356 chars >>= 1;
357 row += 1;
358 *total_chars += 1;
359 }
360
361 let row_chars = chars.count_ones() as u8;
362 *total_chars += usize::from(row_chars);
363 if row_chars > longest_row_chars {
364 (row, row_chars as u32)
365 } else {
366 (longest_row, longest_row_chars as u32)
367 }
368 }
369
370 #[inline(always)]
371 pub fn offset_to_point(&self, offset: usize) -> Point {
372 let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
373 let row = (self.newlines & mask).count_ones();
374 let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
375 let column = (offset - newline_ix as usize) as u32;
376 Point::new(row, column)
377 }
378
379 #[inline(always)]
380 pub fn point_to_offset(&self, point: Point) -> usize {
381 if point.row > self.lines().row {
382 debug_panic!(
383 "point {:?} extends beyond rows for string {:?}",
384 point,
385 self.text
386 );
387 return self.len();
388 }
389
390 let row_offset_range = self.offset_range_for_row(point.row);
391 if point.column > row_offset_range.len() as u32 {
392 debug_panic!(
393 "point {:?} extends beyond row for string {:?}",
394 point,
395 self.text
396 );
397 row_offset_range.end
398 } else {
399 row_offset_range.start + point.column as usize
400 }
401 }
402
403 #[inline(always)]
404 pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
405 let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
406 OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
407 }
408
409 #[inline(always)]
410 pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
411 if target.0 == 0 {
412 0
413 } else {
414 #[cfg(not(test))]
415 let chars_utf16 = self.chars_utf16;
416 #[cfg(test)]
417 let chars_utf16 = self.chars_utf16 as u128;
418 let ix = nth_set_bit(chars_utf16, target.0) + 1;
419 if ix == MAX_BASE {
420 MAX_BASE
421 } else {
422 let utf8_additional_len = cmp::min(
423 (self.chars_utf16 >> ix).trailing_zeros() as usize,
424 self.text.len() - ix,
425 );
426 ix + utf8_additional_len
427 }
428 }
429 }
430
431 #[inline(always)]
432 pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
433 let mask = (1 as Bitmap).unbounded_shl(offset as u32).wrapping_sub(1);
434 let row = (self.newlines & mask).count_ones();
435 let newline_ix = Bitmap::BITS - (self.newlines & mask).leading_zeros();
436 let column = if newline_ix as usize == MAX_BASE {
437 0
438 } else {
439 ((self.chars_utf16 & mask) >> newline_ix).count_ones()
440 };
441 PointUtf16::new(row, column)
442 }
443
444 #[inline(always)]
445 pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
446 self.offset_to_point_utf16(self.point_to_offset(point))
447 }
448
449 #[inline(always)]
450 pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
451 let lines = self.lines();
452 if point.row > lines.row {
453 if !clip {
454 debug_panic!(
455 "point {:?} is beyond this chunk's extent {:?}",
456 point,
457 self.text
458 );
459 }
460 return self.len();
461 }
462
463 let row_offset_range = self.offset_range_for_row(point.row);
464 let line = self.slice(row_offset_range.clone());
465 if point.column > line.last_line_len_utf16() {
466 if !clip {
467 debug_panic!(
468 "point {:?} is beyond the end of the line in chunk {:?}",
469 point,
470 self.text
471 );
472 }
473 return line.len();
474 }
475
476 let mut offset = row_offset_range.start;
477 if point.column > 0 {
478 offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
479 if !self.text.is_char_boundary(offset) {
480 offset -= 1;
481 while !self.text.is_char_boundary(offset) {
482 offset -= 1;
483 }
484 if !clip {
485 debug_panic!(
486 "point {:?} is within character in chunk {:?}",
487 point,
488 self.text,
489 );
490 }
491 }
492 }
493 offset
494 }
495
496 #[inline(always)]
497 pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
498 let max_point = self.lines();
499 if point.0.row > max_point.row {
500 return max_point;
501 }
502
503 let row_offset_range = self.offset_range_for_row(point.0.row);
504 let line = self.slice(row_offset_range);
505 if point.0.column == 0 {
506 Point::new(point.0.row, 0)
507 } else if point.0.column >= line.len_utf16().0 as u32 {
508 Point::new(point.0.row, line.len() as u32)
509 } else {
510 let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
511 while !line.text.is_char_boundary(column) {
512 column -= 1;
513 }
514 Point::new(point.0.row, column as u32)
515 }
516 }
517
518 #[inline(always)]
519 pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
520 let max_point = self.lines();
521 if point.row > max_point.row {
522 return max_point;
523 }
524
525 let line = self.slice(self.offset_range_for_row(point.row));
526 if point.column == 0 {
527 point
528 } else if point.column >= line.len() as u32 {
529 Point::new(point.row, line.len() as u32)
530 } else {
531 let mut column = point.column as usize;
532 let bytes = line.text.as_bytes();
533 if bytes[column - 1] < 128 && bytes[column] < 128 {
534 return Point::new(point.row, column as u32);
535 }
536
537 let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
538 loop {
539 if line.is_char_boundary(column)
540 && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
541 {
542 break;
543 }
544
545 match bias {
546 Bias::Left => column -= 1,
547 Bias::Right => column += 1,
548 }
549 grapheme_cursor.set_cursor(column);
550 }
551 Point::new(point.row, column as u32)
552 }
553 }
554
555 #[inline(always)]
556 pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
557 let max_point = self.lines();
558 if point.0.row > max_point.row {
559 PointUtf16::new(max_point.row, self.last_line_len_utf16())
560 } else {
561 let line = self.slice(self.offset_range_for_row(point.0.row));
562 let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
563 PointUtf16::new(point.0.row, column.0 as u32)
564 }
565 }
566
567 #[inline(always)]
568 pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
569 if target == OffsetUtf16::default() {
570 OffsetUtf16::default()
571 } else if target >= self.len_utf16() {
572 self.len_utf16()
573 } else {
574 let mut offset = self.offset_utf16_to_offset(target);
575 while !self.text.is_char_boundary(offset) {
576 if bias == Bias::Left {
577 offset -= 1;
578 } else {
579 offset += 1;
580 }
581 }
582 self.offset_to_offset_utf16(offset)
583 }
584 }
585
586 #[inline(always)]
587 fn offset_range_for_row(&self, row: u32) -> Range<usize> {
588 let row_start = if row > 0 {
589 #[cfg(not(test))]
590 let newlines = self.newlines;
591 #[cfg(test)]
592 let newlines = self.newlines as u128;
593 nth_set_bit(newlines, row as usize) + 1
594 } else {
595 0
596 };
597 let row_len = if row_start == MAX_BASE {
598 0
599 } else {
600 cmp::min(
601 (self.newlines >> row_start).trailing_zeros(),
602 (self.text.len() - row_start) as u32,
603 )
604 };
605 row_start..row_start + row_len as usize
606 }
607
608 #[inline(always)]
609 pub fn tabs(&self) -> Tabs {
610 Tabs {
611 tabs: self.tabs,
612 chars: self.chars,
613 }
614 }
615}
616
617pub struct Tabs {
618 tabs: Bitmap,
619 chars: Bitmap,
620}
621
622#[derive(Debug, PartialEq, Eq)]
623pub struct TabPosition {
624 pub byte_offset: usize,
625 pub char_offset: usize,
626}
627
628impl Iterator for Tabs {
629 type Item = TabPosition;
630
631 fn next(&mut self) -> Option<Self::Item> {
632 if self.tabs == 0 {
633 return None;
634 }
635
636 let tab_offset = self.tabs.trailing_zeros() as usize;
637 let chars_mask = (1 << tab_offset) - 1;
638 let char_offset = (self.chars & chars_mask).count_ones() as usize;
639
640 // Since tabs are 1 byte the tab offset is the same as the byte offset
641 let position = TabPosition {
642 byte_offset: tab_offset,
643 char_offset,
644 };
645 // Remove the tab we've just seen
646 self.tabs ^= 1 << tab_offset;
647
648 Some(position)
649 }
650}
651
652/// Finds the n-th bit that is set to 1.
653#[inline(always)]
654fn nth_set_bit(v: u128, n: usize) -> usize {
655 let low = v as u64;
656 let high = (v >> 64) as u64;
657
658 let low_count = low.count_ones() as usize;
659 if n > low_count {
660 64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
661 } else {
662 nth_set_bit_u64(low, n as u64) as usize
663 }
664}
665
666#[inline(always)]
667fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
668 let v = v.reverse_bits();
669 let mut s: u64 = 64;
670
671 // Parallel bit count intermediates
672 let a = v - ((v >> 1) & (u64::MAX / 3));
673 let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
674 let c = (b + (b >> 4)) & (u64::MAX / 0x11);
675 let d = (c + (c >> 8)) & (u64::MAX / 0x101);
676
677 // Branchless select
678 let t = (d >> 32) + (d >> 48);
679 s -= (t.wrapping_sub(n) & 256) >> 3;
680 n -= t & (t.wrapping_sub(n) >> 8);
681
682 let t = (d >> (s - 16)) & 0xff;
683 s -= (t.wrapping_sub(n) & 256) >> 4;
684 n -= t & (t.wrapping_sub(n) >> 8);
685
686 let t = (c >> (s - 8)) & 0xf;
687 s -= (t.wrapping_sub(n) & 256) >> 5;
688 n -= t & (t.wrapping_sub(n) >> 8);
689
690 let t = (b >> (s - 4)) & 0x7;
691 s -= (t.wrapping_sub(n) & 256) >> 6;
692 n -= t & (t.wrapping_sub(n) >> 8);
693
694 let t = (a >> (s - 2)) & 0x3;
695 s -= (t.wrapping_sub(n) & 256) >> 7;
696 n -= t & (t.wrapping_sub(n) >> 8);
697
698 let t = (v >> (s - 1)) & 0x1;
699 s -= (t.wrapping_sub(n) & 256) >> 8;
700
701 65 - s - 1
702}
703
704#[cfg(test)]
705mod tests {
706 use super::*;
707 use rand::prelude::*;
708 use util::RandomCharIter;
709
710 #[gpui::test(iterations = 100)]
711 fn test_random_chunks(mut rng: StdRng) {
712 let text = random_string_with_utf8_len(&mut rng, MAX_BASE);
713 log::info!("Chunk: {:?}", text);
714 let chunk = Chunk::new(&text);
715 verify_chunk(chunk.as_slice(), &text);
716
717 // Verify Chunk::chars() bitmap
718 let expected_chars = char_offsets(&text)
719 .into_iter()
720 .inspect(|i| assert!(*i < MAX_BASE))
721 .fold(0 as Bitmap, |acc, i| acc | (1 << i));
722 assert_eq!(chunk.chars(), expected_chars);
723
724 for _ in 0..10 {
725 let mut start = rng.random_range(0..=chunk.text.len());
726 let mut end = rng.random_range(start..=chunk.text.len());
727 while !chunk.text.is_char_boundary(start) {
728 start -= 1;
729 }
730 while !chunk.text.is_char_boundary(end) {
731 end -= 1;
732 }
733 let range = start..end;
734 log::info!("Range: {:?}", range);
735 let text_slice = &text[range.clone()];
736 let chunk_slice = chunk.slice(range);
737 verify_chunk(chunk_slice, text_slice);
738 }
739 }
740
741 #[gpui::test(iterations = 100)]
742 fn test_split_chunk_slice(mut rng: StdRng) {
743 let text = &random_string_with_utf8_len(&mut rng, MAX_BASE);
744 let chunk = Chunk::new(text);
745 let offset = char_offsets_with_end(text)
746 .into_iter()
747 .choose(&mut rng)
748 .unwrap();
749 let (a, b) = chunk.as_slice().split_at(offset);
750 let (a_str, b_str) = text.split_at(offset);
751 verify_chunk(a, a_str);
752 verify_chunk(b, b_str);
753 }
754
755 #[gpui::test(iterations = 1000)]
756 fn test_nth_set_bit_random(mut rng: StdRng) {
757 let set_count = rng.random_range(0..=128);
758 let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
759 set_bits.sort();
760 let mut n = 0;
761 for ix in set_bits.iter().copied() {
762 n |= 1 << ix;
763 }
764
765 for (mut ix, position) in set_bits.into_iter().enumerate() {
766 ix += 1;
767 assert_eq!(
768 nth_set_bit(n, ix),
769 position,
770 "nth_set_bit({:0128b}, {})",
771 n,
772 ix
773 );
774 }
775 }
776
777 /// Returns a (biased) random string whose UTF-8 length is no more than `len`.
778 fn random_string_with_utf8_len(rng: &mut StdRng, len: usize) -> String {
779 let mut str = String::new();
780 let mut chars = RandomCharIter::new(rng);
781 loop {
782 let ch = chars.next().unwrap();
783 if str.len() + ch.len_utf8() > len {
784 break;
785 }
786 str.push(ch);
787 }
788 str
789 }
790
791 #[gpui::test(iterations = 1000)]
792 fn test_append_random_strings(mut rng: StdRng) {
793 let len1 = rng.random_range(0..=MAX_BASE);
794 let len2 = rng.random_range(0..=MAX_BASE).saturating_sub(len1);
795 let str1 = random_string_with_utf8_len(&mut rng, len1);
796 let str2 = random_string_with_utf8_len(&mut rng, len2);
797 let mut chunk1 = Chunk::new(&str1);
798 let chunk2 = Chunk::new(&str2);
799 let char_offsets = char_offsets_with_end(&str2);
800 let start_index = rng.random_range(0..char_offsets.len());
801 let start_offset = char_offsets[start_index];
802 let end_offset = char_offsets[rng.random_range(start_index..char_offsets.len())];
803 chunk1.append(chunk2.slice(start_offset..end_offset));
804 verify_chunk(chunk1.as_slice(), &(str1 + &str2[start_offset..end_offset]));
805 }
806
807 /// Return the byte offsets for each character in a string.
808 ///
809 /// These are valid offsets to split the string.
810 fn char_offsets(text: &str) -> Vec<usize> {
811 text.char_indices().map(|(i, _c)| i).collect()
812 }
813
814 /// Return the byte offsets for each character in a string, plus the offset
815 /// past the end of the string.
816 fn char_offsets_with_end(text: &str) -> Vec<usize> {
817 let mut v = char_offsets(text);
818 v.push(text.len());
819 v
820 }
821
822 fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
823 let mut offset = 0;
824 let mut offset_utf16 = OffsetUtf16(0);
825 let mut point = Point::zero();
826 let mut point_utf16 = PointUtf16::zero();
827
828 log::info!("Verifying chunk {:?}", text);
829 assert_eq!(chunk.offset_to_point(0), Point::zero());
830
831 let mut expected_tab_positions = Vec::new();
832
833 for (char_offset, c) in text.chars().enumerate() {
834 let expected_point = chunk.offset_to_point(offset);
835 assert_eq!(point, expected_point, "mismatch at offset {}", offset);
836 assert_eq!(
837 chunk.point_to_offset(point),
838 offset,
839 "mismatch at point {:?}",
840 point
841 );
842 assert_eq!(
843 chunk.offset_to_offset_utf16(offset),
844 offset_utf16,
845 "mismatch at offset {}",
846 offset
847 );
848 assert_eq!(
849 chunk.offset_utf16_to_offset(offset_utf16),
850 offset,
851 "mismatch at offset_utf16 {:?}",
852 offset_utf16
853 );
854 assert_eq!(
855 chunk.point_to_point_utf16(point),
856 point_utf16,
857 "mismatch at point {:?}",
858 point
859 );
860 assert_eq!(
861 chunk.point_utf16_to_offset(point_utf16, false),
862 offset,
863 "mismatch at point_utf16 {:?}",
864 point_utf16
865 );
866 assert_eq!(
867 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
868 point,
869 "mismatch for unclipped_point_utf16_to_point at {:?}",
870 point_utf16
871 );
872
873 assert_eq!(
874 chunk.clip_point(point, Bias::Left),
875 point,
876 "incorrect left clip at {:?}",
877 point
878 );
879 assert_eq!(
880 chunk.clip_point(point, Bias::Right),
881 point,
882 "incorrect right clip at {:?}",
883 point
884 );
885
886 for i in 1..c.len_utf8() {
887 let test_point = Point::new(point.row, point.column + i as u32);
888 assert_eq!(
889 chunk.clip_point(test_point, Bias::Left),
890 point,
891 "incorrect left clip within multi-byte char at {:?}",
892 test_point
893 );
894 assert_eq!(
895 chunk.clip_point(test_point, Bias::Right),
896 Point::new(point.row, point.column + c.len_utf8() as u32),
897 "incorrect right clip within multi-byte char at {:?}",
898 test_point
899 );
900 }
901
902 for i in 1..c.len_utf16() {
903 let test_point = Unclipped(PointUtf16::new(
904 point_utf16.row,
905 point_utf16.column + i as u32,
906 ));
907 assert_eq!(
908 chunk.unclipped_point_utf16_to_point(test_point),
909 point,
910 "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
911 test_point
912 );
913 assert_eq!(
914 chunk.clip_point_utf16(test_point, Bias::Left),
915 point_utf16,
916 "incorrect left clip_point_utf16 within multi-byte char at {:?}",
917 test_point
918 );
919 assert_eq!(
920 chunk.clip_point_utf16(test_point, Bias::Right),
921 PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
922 "incorrect right clip_point_utf16 within multi-byte char at {:?}",
923 test_point
924 );
925
926 let test_offset = OffsetUtf16(offset_utf16.0 + i);
927 assert_eq!(
928 chunk.clip_offset_utf16(test_offset, Bias::Left),
929 offset_utf16,
930 "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
931 test_offset
932 );
933 assert_eq!(
934 chunk.clip_offset_utf16(test_offset, Bias::Right),
935 OffsetUtf16(offset_utf16.0 + c.len_utf16()),
936 "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
937 test_offset
938 );
939 }
940
941 if c == '\n' {
942 point.row += 1;
943 point.column = 0;
944 point_utf16.row += 1;
945 point_utf16.column = 0;
946 } else {
947 point.column += c.len_utf8() as u32;
948 point_utf16.column += c.len_utf16() as u32;
949 }
950
951 if c == '\t' {
952 expected_tab_positions.push(TabPosition {
953 byte_offset: offset,
954 char_offset,
955 });
956 }
957
958 offset += c.len_utf8();
959 offset_utf16.0 += c.len_utf16();
960 }
961
962 let final_point = chunk.offset_to_point(offset);
963 assert_eq!(point, final_point, "mismatch at final offset {}", offset);
964 assert_eq!(
965 chunk.point_to_offset(point),
966 offset,
967 "mismatch at point {:?}",
968 point
969 );
970 assert_eq!(
971 chunk.offset_to_offset_utf16(offset),
972 offset_utf16,
973 "mismatch at offset {}",
974 offset
975 );
976 assert_eq!(
977 chunk.offset_utf16_to_offset(offset_utf16),
978 offset,
979 "mismatch at offset_utf16 {:?}",
980 offset_utf16
981 );
982 assert_eq!(
983 chunk.point_to_point_utf16(point),
984 point_utf16,
985 "mismatch at final point {:?}",
986 point
987 );
988 assert_eq!(
989 chunk.point_utf16_to_offset(point_utf16, false),
990 offset,
991 "mismatch at final point_utf16 {:?}",
992 point_utf16
993 );
994 assert_eq!(
995 chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
996 point,
997 "mismatch for unclipped_point_utf16_to_point at final point {:?}",
998 point_utf16
999 );
1000 assert_eq!(
1001 chunk.clip_point(point, Bias::Left),
1002 point,
1003 "incorrect left clip at final point {:?}",
1004 point
1005 );
1006 assert_eq!(
1007 chunk.clip_point(point, Bias::Right),
1008 point,
1009 "incorrect right clip at final point {:?}",
1010 point
1011 );
1012 assert_eq!(
1013 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
1014 point_utf16,
1015 "incorrect left clip_point_utf16 at final point {:?}",
1016 point_utf16
1017 );
1018 assert_eq!(
1019 chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
1020 point_utf16,
1021 "incorrect right clip_point_utf16 at final point {:?}",
1022 point_utf16
1023 );
1024 assert_eq!(
1025 chunk.clip_offset_utf16(offset_utf16, Bias::Left),
1026 offset_utf16,
1027 "incorrect left clip_offset_utf16 at final offset {:?}",
1028 offset_utf16
1029 );
1030 assert_eq!(
1031 chunk.clip_offset_utf16(offset_utf16, Bias::Right),
1032 offset_utf16,
1033 "incorrect right clip_offset_utf16 at final offset {:?}",
1034 offset_utf16
1035 );
1036
1037 // Verify length methods
1038 assert_eq!(chunk.len(), text.len());
1039 assert_eq!(
1040 chunk.len_utf16().0,
1041 text.chars().map(|c| c.len_utf16()).sum::<usize>()
1042 );
1043
1044 // Verify line counting
1045 let lines = chunk.lines();
1046 let mut newline_count = 0;
1047 let mut last_line_len = 0;
1048 for c in text.chars() {
1049 if c == '\n' {
1050 newline_count += 1;
1051 last_line_len = 0;
1052 } else {
1053 last_line_len += c.len_utf8() as u32;
1054 }
1055 }
1056 assert_eq!(lines, Point::new(newline_count, last_line_len));
1057
1058 // Verify first/last line chars
1059 if !text.is_empty() {
1060 let first_line = text.split('\n').next().unwrap();
1061 assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
1062
1063 let last_line = text.split('\n').next_back().unwrap();
1064 assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
1065 assert_eq!(
1066 chunk.last_line_len_utf16(),
1067 last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
1068 );
1069 }
1070
1071 // Verify longest row
1072 let (longest_row, longest_chars) = chunk.longest_row(&mut 0);
1073 let mut max_chars = 0;
1074 let mut current_row = 0;
1075 let mut current_chars = 0;
1076 let mut max_row = 0;
1077
1078 for c in text.chars() {
1079 if c == '\n' {
1080 if current_chars > max_chars {
1081 max_chars = current_chars;
1082 max_row = current_row;
1083 }
1084 current_row += 1;
1085 current_chars = 0;
1086 } else {
1087 current_chars += 1;
1088 }
1089 }
1090
1091 if current_chars > max_chars {
1092 max_chars = current_chars;
1093 max_row = current_row;
1094 }
1095
1096 assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
1097 assert_eq!(chunk.tabs().collect::<Vec<_>>(), expected_tab_positions);
1098 }
1099}