1use super::Point;
2use crate::{OffsetUtf16, PointUtf16};
3use arrayvec::ArrayString;
4use bromberg_sl2::{DigestString, HashMatrix};
5use smallvec::SmallVec;
6use std::{cmp, fmt, io, mem, ops::Range, str};
7use sum_tree::{Bias, Dimension, SumTree};
8
9#[cfg(test)]
10const CHUNK_BASE: usize = 6;
11
12#[cfg(not(test))]
13const CHUNK_BASE: usize = 16;
14
15#[derive(Clone, Default, Debug)]
16pub struct Rope {
17 chunks: SumTree<Chunk>,
18}
19
20impl Rope {
21 pub fn new() -> Self {
22 Self::default()
23 }
24
25 pub fn append(&mut self, rope: Rope) {
26 let mut chunks = rope.chunks.cursor::<()>();
27 chunks.next(&());
28 if let Some(chunk) = chunks.item() {
29 if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE)
30 || chunk.0.len() < CHUNK_BASE
31 {
32 self.push(&chunk.0);
33 chunks.next(&());
34 }
35 }
36
37 self.chunks.push_tree(chunks.suffix(&()), &());
38 self.check_invariants();
39 }
40
41 pub fn replace(&mut self, range: Range<usize>, text: &str) {
42 let mut new_rope = Rope::new();
43 let mut cursor = self.cursor(0);
44 new_rope.append(cursor.slice(range.start));
45 cursor.seek_forward(range.end);
46 new_rope.push(text);
47 new_rope.append(cursor.suffix());
48 *self = new_rope;
49 }
50
51 pub fn slice(&self, range: Range<usize>) -> Rope {
52 let mut cursor = self.cursor(0);
53 cursor.seek_forward(range.start);
54 cursor.slice(range.end)
55 }
56
57 pub fn push(&mut self, text: &str) {
58 let mut new_chunks = SmallVec::<[_; 16]>::new();
59 let mut new_chunk = ArrayString::new();
60 for ch in text.chars() {
61 if new_chunk.len() + ch.len_utf8() > 2 * CHUNK_BASE {
62 new_chunks.push(Chunk(new_chunk));
63 new_chunk = ArrayString::new();
64 }
65
66 new_chunk.push(ch);
67 }
68 if !new_chunk.is_empty() {
69 new_chunks.push(Chunk(new_chunk));
70 }
71
72 let mut new_chunks = new_chunks.into_iter();
73 let mut first_new_chunk = new_chunks.next();
74 self.chunks.update_last(
75 |last_chunk| {
76 if let Some(first_new_chunk_ref) = first_new_chunk.as_mut() {
77 if last_chunk.0.len() + first_new_chunk_ref.0.len() <= 2 * CHUNK_BASE {
78 last_chunk.0.push_str(&first_new_chunk.take().unwrap().0);
79 } else {
80 let mut text = ArrayString::<{ 4 * CHUNK_BASE }>::new();
81 text.push_str(&last_chunk.0);
82 text.push_str(&first_new_chunk_ref.0);
83 let (left, right) = text.split_at(find_split_ix(&text));
84 last_chunk.0.clear();
85 last_chunk.0.push_str(left);
86 first_new_chunk_ref.0.clear();
87 first_new_chunk_ref.0.push_str(right);
88 }
89 }
90 },
91 &(),
92 );
93
94 self.chunks
95 .extend(first_new_chunk.into_iter().chain(new_chunks), &());
96 self.check_invariants();
97 }
98
99 pub fn push_front(&mut self, text: &str) {
100 let suffix = mem::replace(self, Rope::from(text));
101 self.append(suffix);
102 }
103
104 fn check_invariants(&self) {
105 #[cfg(test)]
106 {
107 // Ensure all chunks except maybe the last one are not underflowing.
108 // Allow some wiggle room for multibyte characters at chunk boundaries.
109 let mut chunks = self.chunks.cursor::<()>().peekable();
110 while let Some(chunk) = chunks.next() {
111 if chunks.peek().is_some() {
112 assert!(chunk.0.len() + 3 >= CHUNK_BASE);
113 }
114 }
115 }
116 }
117
118 pub fn summary(&self) -> TextSummary {
119 self.chunks.summary().text.clone()
120 }
121
122 pub fn len(&self) -> usize {
123 self.chunks.extent(&())
124 }
125
126 pub fn is_empty(&self) -> bool {
127 self.len() == 0
128 }
129
130 pub fn max_point(&self) -> Point {
131 self.chunks.extent(&())
132 }
133
134 pub fn max_point_utf16(&self) -> PointUtf16 {
135 self.chunks.extent(&())
136 }
137
138 pub fn cursor(&self, offset: usize) -> Cursor {
139 Cursor::new(self, offset)
140 }
141
142 pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
143 self.chars_at(0)
144 }
145
146 pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
147 self.chunks_in_range(start..self.len()).flat_map(str::chars)
148 }
149
150 pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
151 self.reversed_chunks_in_range(0..start)
152 .flat_map(|chunk| chunk.chars().rev())
153 }
154
155 pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes {
156 Bytes::new(self, range)
157 }
158
159 pub fn chunks(&self) -> Chunks {
160 self.chunks_in_range(0..self.len())
161 }
162
163 pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks {
164 Chunks::new(self, range, false)
165 }
166
167 pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks {
168 Chunks::new(self, range, true)
169 }
170
171 pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
172 if offset >= self.summary().len {
173 return self.summary().len_utf16;
174 }
175 let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>();
176 cursor.seek(&offset, Bias::Left, &());
177 let overshoot = offset - cursor.start().0;
178 cursor.start().1
179 + cursor.item().map_or(Default::default(), |chunk| {
180 chunk.offset_to_offset_utf16(overshoot)
181 })
182 }
183
184 pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
185 if offset >= self.summary().len_utf16 {
186 return self.summary().len;
187 }
188 let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>();
189 cursor.seek(&offset, Bias::Left, &());
190 let overshoot = offset - cursor.start().0;
191 cursor.start().1
192 + cursor.item().map_or(Default::default(), |chunk| {
193 chunk.offset_utf16_to_offset(overshoot)
194 })
195 }
196
197 pub fn offset_to_point(&self, offset: usize) -> Point {
198 if offset >= self.summary().len {
199 return self.summary().lines;
200 }
201 let mut cursor = self.chunks.cursor::<(usize, Point)>();
202 cursor.seek(&offset, Bias::Left, &());
203 let overshoot = offset - cursor.start().0;
204 cursor.start().1
205 + cursor
206 .item()
207 .map_or(Point::zero(), |chunk| chunk.offset_to_point(overshoot))
208 }
209
210 pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
211 if offset >= self.summary().len {
212 return self.summary().lines_utf16();
213 }
214 let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>();
215 cursor.seek(&offset, Bias::Left, &());
216 let overshoot = offset - cursor.start().0;
217 cursor.start().1
218 + cursor.item().map_or(PointUtf16::zero(), |chunk| {
219 chunk.offset_to_point_utf16(overshoot)
220 })
221 }
222
223 pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
224 if point >= self.summary().lines {
225 return self.summary().lines_utf16();
226 }
227 let mut cursor = self.chunks.cursor::<(Point, PointUtf16)>();
228 cursor.seek(&point, Bias::Left, &());
229 let overshoot = point - cursor.start().0;
230 cursor.start().1
231 + cursor.item().map_or(PointUtf16::zero(), |chunk| {
232 chunk.point_to_point_utf16(overshoot)
233 })
234 }
235
236 pub fn point_to_offset(&self, point: Point) -> usize {
237 if point >= self.summary().lines {
238 return self.summary().len;
239 }
240 let mut cursor = self.chunks.cursor::<(Point, usize)>();
241 cursor.seek(&point, Bias::Left, &());
242 let overshoot = point - cursor.start().0;
243 cursor.start().1
244 + cursor
245 .item()
246 .map_or(0, |chunk| chunk.point_to_offset(overshoot))
247 }
248
249 pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
250 if point >= self.summary().lines_utf16() {
251 return self.summary().len;
252 }
253 let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>();
254 cursor.seek(&point, Bias::Left, &());
255 let overshoot = point - cursor.start().0;
256 cursor.start().1
257 + cursor
258 .item()
259 .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot))
260 }
261
262 pub fn point_utf16_to_point(&self, point: PointUtf16) -> Point {
263 if point >= self.summary().lines_utf16() {
264 return self.summary().lines;
265 }
266 let mut cursor = self.chunks.cursor::<(PointUtf16, Point)>();
267 cursor.seek(&point, Bias::Left, &());
268 let overshoot = point - cursor.start().0;
269 cursor.start().1
270 + cursor
271 .item()
272 .map_or(Point::zero(), |chunk| chunk.point_utf16_to_point(overshoot))
273 }
274
275 pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize {
276 let mut cursor = self.chunks.cursor::<usize>();
277 cursor.seek(&offset, Bias::Left, &());
278 if let Some(chunk) = cursor.item() {
279 let mut ix = offset - cursor.start();
280 while !chunk.0.is_char_boundary(ix) {
281 match bias {
282 Bias::Left => {
283 ix -= 1;
284 offset -= 1;
285 }
286 Bias::Right => {
287 ix += 1;
288 offset += 1;
289 }
290 }
291 }
292 offset
293 } else {
294 self.summary().len
295 }
296 }
297
298 pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
299 let mut cursor = self.chunks.cursor::<OffsetUtf16>();
300 cursor.seek(&offset, Bias::Right, &());
301 if let Some(chunk) = cursor.item() {
302 let overshoot = offset - cursor.start();
303 *cursor.start() + chunk.clip_offset_utf16(overshoot, bias)
304 } else {
305 self.summary().len_utf16
306 }
307 }
308
309 pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
310 let mut cursor = self.chunks.cursor::<Point>();
311 cursor.seek(&point, Bias::Right, &());
312 if let Some(chunk) = cursor.item() {
313 let overshoot = point - cursor.start();
314 *cursor.start() + chunk.clip_point(overshoot, bias)
315 } else {
316 self.summary().lines
317 }
318 }
319
320 pub fn clip_point_utf16(&self, point: PointUtf16, bias: Bias) -> PointUtf16 {
321 let mut cursor = self.chunks.cursor::<PointUtf16>();
322 cursor.seek(&point, Bias::Right, &());
323 if let Some(chunk) = cursor.item() {
324 let overshoot = point - cursor.start();
325 *cursor.start() + chunk.clip_point_utf16(overshoot, bias)
326 } else {
327 self.summary().lines_utf16()
328 }
329 }
330
331 pub fn line_len(&self, row: u32) -> u32 {
332 self.clip_point(Point::new(row, u32::MAX), Bias::Left)
333 .column
334 }
335
336 pub fn fingerprint(&self) -> String {
337 self.chunks.summary().fingerprint.to_hex()
338 }
339}
340
341impl<'a> From<&'a str> for Rope {
342 fn from(text: &'a str) -> Self {
343 let mut rope = Self::new();
344 rope.push(text);
345 rope
346 }
347}
348
349impl fmt::Display for Rope {
350 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
351 for chunk in self.chunks() {
352 write!(f, "{}", chunk)?;
353 }
354 Ok(())
355 }
356}
357
358pub struct Cursor<'a> {
359 rope: &'a Rope,
360 chunks: sum_tree::Cursor<'a, Chunk, usize>,
361 offset: usize,
362}
363
364impl<'a> Cursor<'a> {
365 pub fn new(rope: &'a Rope, offset: usize) -> Self {
366 let mut chunks = rope.chunks.cursor();
367 chunks.seek(&offset, Bias::Right, &());
368 Self {
369 rope,
370 chunks,
371 offset,
372 }
373 }
374
375 pub fn seek_forward(&mut self, end_offset: usize) {
376 debug_assert!(end_offset >= self.offset);
377
378 self.chunks.seek_forward(&end_offset, Bias::Right, &());
379 self.offset = end_offset;
380 }
381
382 pub fn slice(&mut self, end_offset: usize) -> Rope {
383 debug_assert!(
384 end_offset >= self.offset,
385 "cannot slice backwards from {} to {}",
386 self.offset,
387 end_offset
388 );
389
390 let mut slice = Rope::new();
391 if let Some(start_chunk) = self.chunks.item() {
392 let start_ix = self.offset - self.chunks.start();
393 let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
394 slice.push(&start_chunk.0[start_ix..end_ix]);
395 }
396
397 if end_offset > self.chunks.end(&()) {
398 self.chunks.next(&());
399 slice.append(Rope {
400 chunks: self.chunks.slice(&end_offset, Bias::Right, &()),
401 });
402 if let Some(end_chunk) = self.chunks.item() {
403 let end_ix = end_offset - self.chunks.start();
404 slice.push(&end_chunk.0[..end_ix]);
405 }
406 }
407
408 self.offset = end_offset;
409 slice
410 }
411
412 pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
413 debug_assert!(end_offset >= self.offset);
414
415 let mut summary = D::default();
416 if let Some(start_chunk) = self.chunks.item() {
417 let start_ix = self.offset - self.chunks.start();
418 let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
419 summary.add_assign(&D::from_text_summary(&TextSummary::from(
420 &start_chunk.0[start_ix..end_ix],
421 )));
422 }
423
424 if end_offset > self.chunks.end(&()) {
425 self.chunks.next(&());
426 summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right, &()));
427 if let Some(end_chunk) = self.chunks.item() {
428 let end_ix = end_offset - self.chunks.start();
429 summary.add_assign(&D::from_text_summary(&TextSummary::from(
430 &end_chunk.0[..end_ix],
431 )));
432 }
433 }
434
435 self.offset = end_offset;
436 summary
437 }
438
439 pub fn suffix(mut self) -> Rope {
440 self.slice(self.rope.chunks.extent(&()))
441 }
442
443 pub fn offset(&self) -> usize {
444 self.offset
445 }
446}
447
448pub struct Chunks<'a> {
449 chunks: sum_tree::Cursor<'a, Chunk, usize>,
450 range: Range<usize>,
451 reversed: bool,
452}
453
454impl<'a> Chunks<'a> {
455 pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
456 let mut chunks = rope.chunks.cursor();
457 if reversed {
458 chunks.seek(&range.end, Bias::Left, &());
459 } else {
460 chunks.seek(&range.start, Bias::Right, &());
461 }
462 Self {
463 chunks,
464 range,
465 reversed,
466 }
467 }
468
469 pub fn offset(&self) -> usize {
470 if self.reversed {
471 self.range.end.min(self.chunks.end(&()))
472 } else {
473 self.range.start.max(*self.chunks.start())
474 }
475 }
476
477 pub fn seek(&mut self, offset: usize) {
478 let bias = if self.reversed {
479 Bias::Left
480 } else {
481 Bias::Right
482 };
483
484 if offset >= self.chunks.end(&()) {
485 self.chunks.seek_forward(&offset, bias, &());
486 } else {
487 self.chunks.seek(&offset, bias, &());
488 }
489
490 if self.reversed {
491 self.range.end = offset;
492 } else {
493 self.range.start = offset;
494 }
495 }
496
497 pub fn peek(&self) -> Option<&'a str> {
498 let chunk = self.chunks.item()?;
499 if self.reversed && self.range.start >= self.chunks.end(&()) {
500 return None;
501 }
502 let chunk_start = *self.chunks.start();
503 if self.range.end <= chunk_start {
504 return None;
505 }
506
507 let start = self.range.start.saturating_sub(chunk_start);
508 let end = self.range.end - chunk_start;
509 Some(&chunk.0[start..chunk.0.len().min(end)])
510 }
511}
512
513impl<'a> Iterator for Chunks<'a> {
514 type Item = &'a str;
515
516 fn next(&mut self) -> Option<Self::Item> {
517 let result = self.peek();
518 if result.is_some() {
519 if self.reversed {
520 self.chunks.prev(&());
521 } else {
522 self.chunks.next(&());
523 }
524 }
525 result
526 }
527}
528
529pub struct Bytes<'a> {
530 chunks: sum_tree::Cursor<'a, Chunk, usize>,
531 range: Range<usize>,
532}
533
534impl<'a> Bytes<'a> {
535 pub fn new(rope: &'a Rope, range: Range<usize>) -> Self {
536 let mut chunks = rope.chunks.cursor();
537 chunks.seek(&range.start, Bias::Right, &());
538 Self { chunks, range }
539 }
540
541 pub fn peek(&self) -> Option<&'a [u8]> {
542 let chunk = self.chunks.item()?;
543 let chunk_start = *self.chunks.start();
544 if self.range.end <= chunk_start {
545 return None;
546 }
547
548 let start = self.range.start.saturating_sub(chunk_start);
549 let end = self.range.end - chunk_start;
550 Some(&chunk.0.as_bytes()[start..chunk.0.len().min(end)])
551 }
552}
553
554impl<'a> Iterator for Bytes<'a> {
555 type Item = &'a [u8];
556
557 fn next(&mut self) -> Option<Self::Item> {
558 let result = self.peek();
559 if result.is_some() {
560 self.chunks.next(&());
561 }
562 result
563 }
564}
565
566impl<'a> io::Read for Bytes<'a> {
567 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
568 if let Some(chunk) = self.peek() {
569 let len = cmp::min(buf.len(), chunk.len());
570 buf[..len].copy_from_slice(&chunk[..len]);
571 self.range.start += len;
572 if len == chunk.len() {
573 self.chunks.next(&());
574 }
575 Ok(len)
576 } else {
577 Ok(0)
578 }
579 }
580}
581
582#[derive(Clone, Debug, Default)]
583struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
584
585impl Chunk {
586 fn offset_to_offset_utf16(&self, target: usize) -> OffsetUtf16 {
587 let mut offset = 0;
588 let mut offset_utf16 = OffsetUtf16(0);
589 for ch in self.0.chars() {
590 if offset >= target {
591 break;
592 }
593
594 offset += ch.len_utf8();
595 offset_utf16.0 += ch.len_utf16();
596 }
597 offset_utf16
598 }
599
600 fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
601 let mut offset_utf16 = OffsetUtf16(0);
602 let mut offset = 0;
603 for ch in self.0.chars() {
604 if offset_utf16 >= target {
605 break;
606 }
607
608 offset += ch.len_utf8();
609 offset_utf16.0 += ch.len_utf16();
610 }
611 offset
612 }
613
614 fn offset_to_point(&self, target: usize) -> Point {
615 let mut offset = 0;
616 let mut point = Point::new(0, 0);
617 for ch in self.0.chars() {
618 if offset >= target {
619 break;
620 }
621
622 if ch == '\n' {
623 point.row += 1;
624 point.column = 0;
625 } else {
626 point.column += ch.len_utf8() as u32;
627 }
628 offset += ch.len_utf8();
629 }
630 point
631 }
632
633 fn offset_to_point_utf16(&self, target: usize) -> PointUtf16 {
634 let mut offset = 0;
635 let mut point = PointUtf16::new(0, 0);
636 for ch in self.0.chars() {
637 if offset >= target {
638 break;
639 }
640
641 if ch == '\n' {
642 point.row += 1;
643 point.column = 0;
644 } else {
645 point.column += ch.len_utf16() as u32;
646 }
647 offset += ch.len_utf8();
648 }
649 point
650 }
651
652 fn point_to_offset(&self, target: Point) -> usize {
653 let mut offset = 0;
654 let mut point = Point::new(0, 0);
655 for ch in self.0.chars() {
656 if point >= target {
657 if point > target {
658 panic!("point {:?} is inside of character {:?}", target, ch);
659 }
660 break;
661 }
662
663 if ch == '\n' {
664 point.row += 1;
665 if point.row > target.row {
666 panic!(
667 "point {:?} is beyond the end of a line with length {}",
668 target, point.column
669 );
670 }
671 point.column = 0;
672 } else {
673 point.column += ch.len_utf8() as u32;
674 }
675 offset += ch.len_utf8();
676 }
677 offset
678 }
679
680 fn point_to_point_utf16(&self, target: Point) -> PointUtf16 {
681 let mut point = Point::zero();
682 let mut point_utf16 = PointUtf16::new(0, 0);
683 for ch in self.0.chars() {
684 if point >= target {
685 break;
686 }
687
688 if ch == '\n' {
689 point_utf16.row += 1;
690 point_utf16.column = 0;
691 point.row += 1;
692 point.column = 0;
693 } else {
694 point_utf16.column += ch.len_utf16() as u32;
695 point.column += ch.len_utf8() as u32;
696 }
697 }
698 point_utf16
699 }
700
701 fn point_utf16_to_offset(&self, target: PointUtf16) -> usize {
702 let mut offset = 0;
703 let mut point = PointUtf16::new(0, 0);
704 for ch in self.0.chars() {
705 if point >= target {
706 if point > target {
707 panic!("point {:?} is inside of character {:?}", target, ch);
708 }
709 break;
710 }
711
712 if ch == '\n' {
713 point.row += 1;
714 if point.row > target.row {
715 panic!(
716 "point {:?} is beyond the end of a line with length {}",
717 target, point.column
718 );
719 }
720 point.column = 0;
721 } else {
722 point.column += ch.len_utf16() as u32;
723 }
724 offset += ch.len_utf8();
725 }
726 offset
727 }
728
729 fn point_utf16_to_point(&self, target: PointUtf16) -> Point {
730 let mut point = Point::zero();
731 let mut point_utf16 = PointUtf16::zero();
732 for ch in self.0.chars() {
733 if point_utf16 >= target {
734 if point_utf16 > target {
735 panic!("point {:?} is inside of character {:?}", target, ch);
736 }
737 break;
738 }
739
740 if ch == '\n' {
741 point_utf16 += PointUtf16::new(1, 0);
742 point += Point::new(1, 0);
743 } else {
744 point_utf16 += PointUtf16::new(0, ch.len_utf16() as u32);
745 point += Point::new(0, ch.len_utf8() as u32);
746 }
747 }
748 point
749 }
750
751 fn clip_point(&self, target: Point, bias: Bias) -> Point {
752 for (row, line) in self.0.split('\n').enumerate() {
753 if row == target.row as usize {
754 let mut column = target.column.min(line.len() as u32);
755 while !line.is_char_boundary(column as usize) {
756 match bias {
757 Bias::Left => column -= 1,
758 Bias::Right => column += 1,
759 }
760 }
761 return Point::new(row as u32, column);
762 }
763 }
764 unreachable!()
765 }
766
767 fn clip_point_utf16(&self, target: PointUtf16, bias: Bias) -> PointUtf16 {
768 for (row, line) in self.0.split('\n').enumerate() {
769 if row == target.row as usize {
770 let mut code_units = line.encode_utf16();
771 let mut column = code_units.by_ref().take(target.column as usize).count();
772 if char::decode_utf16(code_units).next().transpose().is_err() {
773 match bias {
774 Bias::Left => column -= 1,
775 Bias::Right => column += 1,
776 }
777 }
778 return PointUtf16::new(row as u32, column as u32);
779 }
780 }
781 unreachable!()
782 }
783
784 fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
785 let mut code_units = self.0.encode_utf16();
786 let mut offset = code_units.by_ref().take(target.0 as usize).count();
787 if char::decode_utf16(code_units).next().transpose().is_err() {
788 match bias {
789 Bias::Left => offset -= 1,
790 Bias::Right => offset += 1,
791 }
792 }
793 OffsetUtf16(offset)
794 }
795}
796
797impl sum_tree::Item for Chunk {
798 type Summary = ChunkSummary;
799
800 fn summary(&self) -> Self::Summary {
801 ChunkSummary::from(self.0.as_str())
802 }
803}
804
805#[derive(Clone, Debug, Default, Eq, PartialEq)]
806pub struct ChunkSummary {
807 text: TextSummary,
808 fingerprint: HashMatrix,
809}
810
811impl<'a> From<&'a str> for ChunkSummary {
812 fn from(text: &'a str) -> Self {
813 Self {
814 text: TextSummary::from(text),
815 fingerprint: bromberg_sl2::hash_strict(text.as_bytes()),
816 }
817 }
818}
819
820impl sum_tree::Summary for ChunkSummary {
821 type Context = ();
822
823 fn add_summary(&mut self, summary: &Self, _: &()) {
824 self.text += &summary.text;
825 self.fingerprint = self.fingerprint * summary.fingerprint;
826 }
827}
828
829#[derive(Clone, Debug, Default, Eq, PartialEq)]
830pub struct TextSummary {
831 pub len: usize,
832 pub len_utf16: OffsetUtf16,
833 pub lines: Point,
834 pub first_line_chars: u32,
835 pub last_line_chars: u32,
836 pub last_line_len_utf16: u32,
837 pub longest_row: u32,
838 pub longest_row_chars: u32,
839}
840
841impl TextSummary {
842 pub fn lines_utf16(&self) -> PointUtf16 {
843 PointUtf16 {
844 row: self.lines.row,
845 column: self.last_line_len_utf16,
846 }
847 }
848}
849
850impl<'a> From<&'a str> for TextSummary {
851 fn from(text: &'a str) -> Self {
852 let mut len_utf16 = OffsetUtf16(0);
853 let mut lines = Point::new(0, 0);
854 let mut first_line_chars = 0;
855 let mut last_line_chars = 0;
856 let mut last_line_len_utf16 = 0;
857 let mut longest_row = 0;
858 let mut longest_row_chars = 0;
859 for c in text.chars() {
860 len_utf16.0 += c.len_utf16();
861
862 if c == '\n' {
863 lines += Point::new(1, 0);
864 last_line_len_utf16 = 0;
865 last_line_chars = 0;
866 } else {
867 lines.column += c.len_utf8() as u32;
868 last_line_len_utf16 += c.len_utf16() as u32;
869 last_line_chars += 1;
870 }
871
872 if lines.row == 0 {
873 first_line_chars = last_line_chars;
874 }
875
876 if last_line_chars > longest_row_chars {
877 longest_row = lines.row;
878 longest_row_chars = last_line_chars;
879 }
880 }
881
882 TextSummary {
883 len: text.len(),
884 len_utf16,
885 lines,
886 first_line_chars,
887 last_line_chars,
888 last_line_len_utf16,
889 longest_row,
890 longest_row_chars,
891 }
892 }
893}
894
895impl sum_tree::Summary for TextSummary {
896 type Context = ();
897
898 fn add_summary(&mut self, summary: &Self, _: &Self::Context) {
899 *self += summary;
900 }
901}
902
903impl std::ops::Add<Self> for TextSummary {
904 type Output = Self;
905
906 fn add(mut self, rhs: Self) -> Self::Output {
907 self.add_assign(&rhs);
908 self
909 }
910}
911
912impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
913 fn add_assign(&mut self, other: &'a Self) {
914 let joined_chars = self.last_line_chars + other.first_line_chars;
915 if joined_chars > self.longest_row_chars {
916 self.longest_row = self.lines.row;
917 self.longest_row_chars = joined_chars;
918 }
919 if other.longest_row_chars > self.longest_row_chars {
920 self.longest_row = self.lines.row + other.longest_row;
921 self.longest_row_chars = other.longest_row_chars;
922 }
923
924 if self.lines.row == 0 {
925 self.first_line_chars += other.first_line_chars;
926 }
927
928 if other.lines.row == 0 {
929 self.last_line_chars += other.first_line_chars;
930 self.last_line_len_utf16 += other.last_line_len_utf16;
931 } else {
932 self.last_line_chars = other.last_line_chars;
933 self.last_line_len_utf16 = other.last_line_len_utf16;
934 }
935
936 self.len += other.len;
937 self.len_utf16 += other.len_utf16;
938 self.lines += other.lines;
939 }
940}
941
942impl std::ops::AddAssign<Self> for TextSummary {
943 fn add_assign(&mut self, other: Self) {
944 *self += &other;
945 }
946}
947
948pub trait TextDimension: 'static + for<'a> Dimension<'a, ChunkSummary> {
949 fn from_text_summary(summary: &TextSummary) -> Self;
950 fn add_assign(&mut self, other: &Self);
951}
952
953impl<D1: TextDimension, D2: TextDimension> TextDimension for (D1, D2) {
954 fn from_text_summary(summary: &TextSummary) -> Self {
955 (
956 D1::from_text_summary(summary),
957 D2::from_text_summary(summary),
958 )
959 }
960
961 fn add_assign(&mut self, other: &Self) {
962 self.0.add_assign(&other.0);
963 self.1.add_assign(&other.1);
964 }
965}
966
967impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
968 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
969 *self += &summary.text;
970 }
971}
972
973impl TextDimension for TextSummary {
974 fn from_text_summary(summary: &TextSummary) -> Self {
975 summary.clone()
976 }
977
978 fn add_assign(&mut self, other: &Self) {
979 *self += other;
980 }
981}
982
983impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
984 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
985 *self += summary.text.len;
986 }
987}
988
989impl TextDimension for usize {
990 fn from_text_summary(summary: &TextSummary) -> Self {
991 summary.len
992 }
993
994 fn add_assign(&mut self, other: &Self) {
995 *self += other;
996 }
997}
998
999impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
1000 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1001 *self += summary.text.len_utf16;
1002 }
1003}
1004
1005impl TextDimension for OffsetUtf16 {
1006 fn from_text_summary(summary: &TextSummary) -> Self {
1007 summary.len_utf16
1008 }
1009
1010 fn add_assign(&mut self, other: &Self) {
1011 *self += other;
1012 }
1013}
1014
1015impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
1016 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1017 *self += summary.text.lines;
1018 }
1019}
1020
1021impl TextDimension for Point {
1022 fn from_text_summary(summary: &TextSummary) -> Self {
1023 summary.lines
1024 }
1025
1026 fn add_assign(&mut self, other: &Self) {
1027 *self += other;
1028 }
1029}
1030
1031impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
1032 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1033 *self += summary.text.lines_utf16();
1034 }
1035}
1036
1037impl TextDimension for PointUtf16 {
1038 fn from_text_summary(summary: &TextSummary) -> Self {
1039 summary.lines_utf16()
1040 }
1041
1042 fn add_assign(&mut self, other: &Self) {
1043 *self += other;
1044 }
1045}
1046
1047fn find_split_ix(text: &str) -> usize {
1048 let mut ix = text.len() / 2;
1049 while !text.is_char_boundary(ix) {
1050 if ix < 2 * CHUNK_BASE {
1051 ix += 1;
1052 } else {
1053 ix = (text.len() / 2) - 1;
1054 break;
1055 }
1056 }
1057 while !text.is_char_boundary(ix) {
1058 ix -= 1;
1059 }
1060
1061 debug_assert!(ix <= 2 * CHUNK_BASE);
1062 debug_assert!(text.len() - ix <= 2 * CHUNK_BASE);
1063 ix
1064}
1065
1066#[cfg(test)]
1067mod tests {
1068 use super::*;
1069 use crate::random_char_iter::RandomCharIter;
1070 use rand::prelude::*;
1071 use std::{cmp::Ordering, env, io::Read};
1072 use Bias::{Left, Right};
1073
1074 #[test]
1075 fn test_all_4_byte_chars() {
1076 let mut rope = Rope::new();
1077 let text = "🏀".repeat(256);
1078 rope.push(&text);
1079 assert_eq!(rope.text(), text);
1080 }
1081
1082 #[test]
1083 fn test_clip() {
1084 let rope = Rope::from("🧘");
1085
1086 assert_eq!(rope.clip_offset(1, Bias::Left), 0);
1087 assert_eq!(rope.clip_offset(1, Bias::Right), 4);
1088 assert_eq!(rope.clip_offset(5, Bias::Right), 4);
1089
1090 assert_eq!(
1091 rope.clip_point(Point::new(0, 1), Bias::Left),
1092 Point::new(0, 0)
1093 );
1094 assert_eq!(
1095 rope.clip_point(Point::new(0, 1), Bias::Right),
1096 Point::new(0, 4)
1097 );
1098 assert_eq!(
1099 rope.clip_point(Point::new(0, 5), Bias::Right),
1100 Point::new(0, 4)
1101 );
1102
1103 assert_eq!(
1104 rope.clip_point_utf16(PointUtf16::new(0, 1), Bias::Left),
1105 PointUtf16::new(0, 0)
1106 );
1107 assert_eq!(
1108 rope.clip_point_utf16(PointUtf16::new(0, 1), Bias::Right),
1109 PointUtf16::new(0, 2)
1110 );
1111 assert_eq!(
1112 rope.clip_point_utf16(PointUtf16::new(0, 3), Bias::Right),
1113 PointUtf16::new(0, 2)
1114 );
1115
1116 assert_eq!(
1117 rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
1118 OffsetUtf16(0)
1119 );
1120 assert_eq!(
1121 rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
1122 OffsetUtf16(2)
1123 );
1124 assert_eq!(
1125 rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
1126 OffsetUtf16(2)
1127 );
1128 }
1129
1130 #[gpui::test(iterations = 100)]
1131 fn test_random_rope(mut rng: StdRng) {
1132 let operations = env::var("OPERATIONS")
1133 .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1134 .unwrap_or(10);
1135
1136 let mut expected = String::new();
1137 let mut actual = Rope::new();
1138 for _ in 0..operations {
1139 let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1140 let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1141 let len = rng.gen_range(0..=64);
1142 let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1143
1144 let mut new_actual = Rope::new();
1145 let mut cursor = actual.cursor(0);
1146 new_actual.append(cursor.slice(start_ix));
1147 new_actual.push(&new_text);
1148 cursor.seek_forward(end_ix);
1149 new_actual.append(cursor.suffix());
1150 actual = new_actual;
1151
1152 expected.replace_range(start_ix..end_ix, &new_text);
1153
1154 assert_eq!(actual.text(), expected);
1155 log::info!("text: {:?}", expected);
1156
1157 for _ in 0..5 {
1158 let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1159 let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1160
1161 let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1162 assert_eq!(actual_text, &expected[start_ix..end_ix]);
1163
1164 let mut actual_text = String::new();
1165 actual
1166 .bytes_in_range(start_ix..end_ix)
1167 .read_to_string(&mut actual_text)
1168 .unwrap();
1169 assert_eq!(actual_text, &expected[start_ix..end_ix]);
1170
1171 assert_eq!(
1172 actual
1173 .reversed_chunks_in_range(start_ix..end_ix)
1174 .collect::<Vec<&str>>()
1175 .into_iter()
1176 .rev()
1177 .collect::<String>(),
1178 &expected[start_ix..end_ix]
1179 );
1180 }
1181
1182 let mut offset_utf16 = OffsetUtf16(0);
1183 let mut point = Point::new(0, 0);
1184 let mut point_utf16 = PointUtf16::new(0, 0);
1185 for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
1186 assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
1187 assert_eq!(
1188 actual.offset_to_point_utf16(ix),
1189 point_utf16,
1190 "offset_to_point_utf16({})",
1191 ix
1192 );
1193 assert_eq!(
1194 actual.point_to_offset(point),
1195 ix,
1196 "point_to_offset({:?})",
1197 point
1198 );
1199 assert_eq!(
1200 actual.point_utf16_to_offset(point_utf16),
1201 ix,
1202 "point_utf16_to_offset({:?})",
1203 point_utf16
1204 );
1205 assert_eq!(
1206 actual.offset_to_offset_utf16(ix),
1207 offset_utf16,
1208 "offset_to_offset_utf16({:?})",
1209 ix
1210 );
1211 assert_eq!(
1212 actual.offset_utf16_to_offset(offset_utf16),
1213 ix,
1214 "offset_utf16_to_offset({:?})",
1215 offset_utf16
1216 );
1217 if ch == '\n' {
1218 point += Point::new(1, 0);
1219 point_utf16 += PointUtf16::new(1, 0);
1220 } else {
1221 point.column += ch.len_utf8() as u32;
1222 point_utf16.column += ch.len_utf16() as u32;
1223 }
1224 offset_utf16.0 += ch.len_utf16();
1225 }
1226
1227 let mut offset_utf16 = OffsetUtf16(0);
1228 let mut point_utf16 = PointUtf16::zero();
1229 for unit in expected.encode_utf16() {
1230 let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
1231 let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
1232 assert!(right_offset >= left_offset);
1233 // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
1234 actual.offset_utf16_to_offset(left_offset);
1235 actual.offset_utf16_to_offset(right_offset);
1236
1237 let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
1238 let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
1239 assert!(right_point >= left_point);
1240 // Ensure translating UTF-16 points to offsets doesn't panic.
1241 actual.point_utf16_to_offset(left_point);
1242 actual.point_utf16_to_offset(right_point);
1243
1244 offset_utf16.0 += 1;
1245 if unit == b'\n' as u16 {
1246 point_utf16 += PointUtf16::new(1, 0);
1247 } else {
1248 point_utf16 += PointUtf16::new(0, 1);
1249 }
1250 }
1251
1252 for _ in 0..5 {
1253 let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1254 let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1255 assert_eq!(
1256 actual.cursor(start_ix).summary::<TextSummary>(end_ix),
1257 TextSummary::from(&expected[start_ix..end_ix])
1258 );
1259 }
1260
1261 let mut expected_longest_rows = Vec::new();
1262 let mut longest_line_len = -1_isize;
1263 for (row, line) in expected.split('\n').enumerate() {
1264 let row = row as u32;
1265 assert_eq!(
1266 actual.line_len(row),
1267 line.len() as u32,
1268 "invalid line len for row {}",
1269 row
1270 );
1271
1272 let line_char_count = line.chars().count() as isize;
1273 match line_char_count.cmp(&longest_line_len) {
1274 Ordering::Less => {}
1275 Ordering::Equal => expected_longest_rows.push(row),
1276 Ordering::Greater => {
1277 longest_line_len = line_char_count;
1278 expected_longest_rows.clear();
1279 expected_longest_rows.push(row);
1280 }
1281 }
1282 }
1283
1284 let longest_row = actual.summary().longest_row;
1285 assert!(
1286 expected_longest_rows.contains(&longest_row),
1287 "incorrect longest row {}. expected {:?} with length {}",
1288 longest_row,
1289 expected_longest_rows,
1290 longest_line_len,
1291 );
1292 }
1293 }
1294
1295 fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
1296 while !text.is_char_boundary(offset) {
1297 match bias {
1298 Bias::Left => offset -= 1,
1299 Bias::Right => offset += 1,
1300 }
1301 }
1302 offset
1303 }
1304
1305 impl Rope {
1306 fn text(&self) -> String {
1307 let mut text = String::new();
1308 for chunk in self.chunks.cursor::<()>() {
1309 text.push_str(&chunk.0);
1310 }
1311 text
1312 }
1313 }
1314}