1mod offset_utf16;
2mod point;
3mod point_utf16;
4mod unclipped;
5
6use arrayvec::ArrayString;
7use bromberg_sl2::HashMatrix;
8use smallvec::SmallVec;
9use std::{
10 cmp, fmt, io, mem,
11 ops::{AddAssign, Range},
12 str,
13};
14use sum_tree::{Bias, Dimension, SumTree};
15use util::debug_panic;
16
17pub use offset_utf16::OffsetUtf16;
18pub use point::Point;
19pub use point_utf16::PointUtf16;
20pub use unclipped::Unclipped;
21
22#[cfg(test)]
23const CHUNK_BASE: usize = 6;
24
25#[cfg(not(test))]
26const CHUNK_BASE: usize = 16;
27
28/// Type alias to [`HashMatrix`], an implementation of a homomorphic hash function. Two [`Rope`] instances
29/// containing the same text will produce the same fingerprint. This hash function is special in that
30/// it allows us to hash individual chunks and aggregate them up the [`Rope`]'s tree, with the resulting
31/// hash being equivalent to hashing all the text contained in the [`Rope`] at once.
32pub type RopeFingerprint = HashMatrix;
33
34#[derive(Clone, Default)]
35pub struct Rope {
36 chunks: SumTree<Chunk>,
37}
38
39impl Rope {
40 pub fn new() -> Self {
41 Self::default()
42 }
43
44 pub fn text_fingerprint(text: &str) -> RopeFingerprint {
45 bromberg_sl2::hash_strict(text.as_bytes())
46 }
47
48 pub fn append(&mut self, rope: Rope) {
49 let mut chunks = rope.chunks.cursor::<()>();
50 chunks.next(&());
51 if let Some(chunk) = chunks.item() {
52 if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE)
53 || chunk.0.len() < CHUNK_BASE
54 {
55 self.push(&chunk.0);
56 chunks.next(&());
57 }
58 }
59
60 self.chunks.append(chunks.suffix(&()), &());
61 self.check_invariants();
62 }
63
64 pub fn replace(&mut self, range: Range<usize>, text: &str) {
65 let mut new_rope = Rope::new();
66 let mut cursor = self.cursor(0);
67 new_rope.append(cursor.slice(range.start));
68 cursor.seek_forward(range.end);
69 new_rope.push(text);
70 new_rope.append(cursor.suffix());
71 *self = new_rope;
72 }
73
74 pub fn slice(&self, range: Range<usize>) -> Rope {
75 let mut cursor = self.cursor(0);
76 cursor.seek_forward(range.start);
77 cursor.slice(range.end)
78 }
79
80 pub fn slice_rows(&self, range: Range<u32>) -> Rope {
81 // This would be more efficient with a forward advance after the first, but it's fine.
82 let start = self.point_to_offset(Point::new(range.start, 0));
83 let end = self.point_to_offset(Point::new(range.end, 0));
84 self.slice(start..end)
85 }
86
87 pub fn push(&mut self, mut text: &str) {
88 self.chunks.update_last(
89 |last_chunk| {
90 let split_ix = if last_chunk.0.len() + text.len() <= 2 * CHUNK_BASE {
91 text.len()
92 } else {
93 let mut split_ix =
94 cmp::min(CHUNK_BASE.saturating_sub(last_chunk.0.len()), text.len());
95 while !text.is_char_boundary(split_ix) {
96 split_ix += 1;
97 }
98 split_ix
99 };
100
101 let (suffix, remainder) = text.split_at(split_ix);
102 last_chunk.0.push_str(suffix);
103 text = remainder;
104 },
105 &(),
106 );
107
108 let mut new_chunks = SmallVec::<[_; 16]>::new();
109 while !text.is_empty() {
110 let mut split_ix = cmp::min(2 * CHUNK_BASE, text.len());
111 while !text.is_char_boundary(split_ix) {
112 split_ix -= 1;
113 }
114 let (chunk, remainder) = text.split_at(split_ix);
115 new_chunks.push(Chunk(ArrayString::from(chunk).unwrap()));
116 text = remainder;
117 }
118
119 #[cfg(test)]
120 const PARALLEL_THRESHOLD: usize = 4;
121 #[cfg(not(test))]
122 const PARALLEL_THRESHOLD: usize = 4 * (2 * sum_tree::TREE_BASE);
123
124 if new_chunks.len() >= PARALLEL_THRESHOLD {
125 self.chunks.par_extend(new_chunks.into_vec(), &());
126 } else {
127 self.chunks.extend(new_chunks, &());
128 }
129
130 self.check_invariants();
131 }
132
133 pub fn push_front(&mut self, text: &str) {
134 let suffix = mem::replace(self, Rope::from(text));
135 self.append(suffix);
136 }
137
138 fn check_invariants(&self) {
139 #[cfg(test)]
140 {
141 // Ensure all chunks except maybe the last one are not underflowing.
142 // Allow some wiggle room for multibyte characters at chunk boundaries.
143 let mut chunks = self.chunks.cursor::<()>().peekable();
144 while let Some(chunk) = chunks.next() {
145 if chunks.peek().is_some() {
146 assert!(chunk.0.len() + 3 >= CHUNK_BASE);
147 }
148 }
149 }
150 }
151
152 pub fn summary(&self) -> TextSummary {
153 self.chunks.summary().text.clone()
154 }
155
156 pub fn len(&self) -> usize {
157 self.chunks.extent(&())
158 }
159
160 pub fn is_empty(&self) -> bool {
161 self.len() == 0
162 }
163
164 pub fn max_point(&self) -> Point {
165 self.chunks.extent(&())
166 }
167
168 pub fn max_point_utf16(&self) -> PointUtf16 {
169 self.chunks.extent(&())
170 }
171
172 pub fn cursor(&self, offset: usize) -> Cursor {
173 Cursor::new(self, offset)
174 }
175
176 pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
177 self.chars_at(0)
178 }
179
180 pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
181 self.chunks_in_range(start..self.len()).flat_map(str::chars)
182 }
183
184 pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
185 self.reversed_chunks_in_range(0..start)
186 .flat_map(|chunk| chunk.chars().rev())
187 }
188
189 pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes {
190 Bytes::new(self, range, false)
191 }
192
193 pub fn reversed_bytes_in_range(&self, range: Range<usize>) -> Bytes {
194 Bytes::new(self, range, true)
195 }
196
197 pub fn chunks(&self) -> Chunks {
198 self.chunks_in_range(0..self.len())
199 }
200
201 pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks {
202 Chunks::new(self, range, false)
203 }
204
205 pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks {
206 Chunks::new(self, range, true)
207 }
208
209 pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
210 if offset >= self.summary().len {
211 return self.summary().len_utf16;
212 }
213 let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>();
214 cursor.seek(&offset, Bias::Left, &());
215 let overshoot = offset - cursor.start().0;
216 cursor.start().1
217 + cursor.item().map_or(Default::default(), |chunk| {
218 chunk.offset_to_offset_utf16(overshoot)
219 })
220 }
221
222 pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
223 if offset >= self.summary().len_utf16 {
224 return self.summary().len;
225 }
226 let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>();
227 cursor.seek(&offset, Bias::Left, &());
228 let overshoot = offset - cursor.start().0;
229 cursor.start().1
230 + cursor.item().map_or(Default::default(), |chunk| {
231 chunk.offset_utf16_to_offset(overshoot)
232 })
233 }
234
235 pub fn offset_to_point(&self, offset: usize) -> Point {
236 if offset >= self.summary().len {
237 return self.summary().lines;
238 }
239 let mut cursor = self.chunks.cursor::<(usize, Point)>();
240 cursor.seek(&offset, Bias::Left, &());
241 let overshoot = offset - cursor.start().0;
242 cursor.start().1
243 + cursor
244 .item()
245 .map_or(Point::zero(), |chunk| chunk.offset_to_point(overshoot))
246 }
247
248 pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
249 if offset >= self.summary().len {
250 return self.summary().lines_utf16();
251 }
252 let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>();
253 cursor.seek(&offset, Bias::Left, &());
254 let overshoot = offset - cursor.start().0;
255 cursor.start().1
256 + cursor.item().map_or(PointUtf16::zero(), |chunk| {
257 chunk.offset_to_point_utf16(overshoot)
258 })
259 }
260
261 pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
262 if point >= self.summary().lines {
263 return self.summary().lines_utf16();
264 }
265 let mut cursor = self.chunks.cursor::<(Point, PointUtf16)>();
266 cursor.seek(&point, Bias::Left, &());
267 let overshoot = point - cursor.start().0;
268 cursor.start().1
269 + cursor.item().map_or(PointUtf16::zero(), |chunk| {
270 chunk.point_to_point_utf16(overshoot)
271 })
272 }
273
274 pub fn point_to_offset(&self, point: Point) -> usize {
275 if point >= self.summary().lines {
276 return self.summary().len;
277 }
278 let mut cursor = self.chunks.cursor::<(Point, usize)>();
279 cursor.seek(&point, Bias::Left, &());
280 let overshoot = point - cursor.start().0;
281 cursor.start().1
282 + cursor
283 .item()
284 .map_or(0, |chunk| chunk.point_to_offset(overshoot))
285 }
286
287 pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
288 self.point_utf16_to_offset_impl(point, false)
289 }
290
291 pub fn unclipped_point_utf16_to_offset(&self, point: Unclipped<PointUtf16>) -> usize {
292 self.point_utf16_to_offset_impl(point.0, true)
293 }
294
295 fn point_utf16_to_offset_impl(&self, point: PointUtf16, clip: bool) -> usize {
296 if point >= self.summary().lines_utf16() {
297 return self.summary().len;
298 }
299 let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>();
300 cursor.seek(&point, Bias::Left, &());
301 let overshoot = point - cursor.start().0;
302 cursor.start().1
303 + cursor
304 .item()
305 .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot, clip))
306 }
307
308 pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
309 if point.0 >= self.summary().lines_utf16() {
310 return self.summary().lines;
311 }
312 let mut cursor = self.chunks.cursor::<(PointUtf16, Point)>();
313 cursor.seek(&point.0, Bias::Left, &());
314 let overshoot = Unclipped(point.0 - cursor.start().0);
315 cursor.start().1
316 + cursor.item().map_or(Point::zero(), |chunk| {
317 chunk.unclipped_point_utf16_to_point(overshoot)
318 })
319 }
320
321 pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize {
322 let mut cursor = self.chunks.cursor::<usize>();
323 cursor.seek(&offset, Bias::Left, &());
324 if let Some(chunk) = cursor.item() {
325 let mut ix = offset - cursor.start();
326 while !chunk.0.is_char_boundary(ix) {
327 match bias {
328 Bias::Left => {
329 ix -= 1;
330 offset -= 1;
331 }
332 Bias::Right => {
333 ix += 1;
334 offset += 1;
335 }
336 }
337 }
338 offset
339 } else {
340 self.summary().len
341 }
342 }
343
344 pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
345 let mut cursor = self.chunks.cursor::<OffsetUtf16>();
346 cursor.seek(&offset, Bias::Right, &());
347 if let Some(chunk) = cursor.item() {
348 let overshoot = offset - cursor.start();
349 *cursor.start() + chunk.clip_offset_utf16(overshoot, bias)
350 } else {
351 self.summary().len_utf16
352 }
353 }
354
355 pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
356 let mut cursor = self.chunks.cursor::<Point>();
357 cursor.seek(&point, Bias::Right, &());
358 if let Some(chunk) = cursor.item() {
359 let overshoot = point - cursor.start();
360 *cursor.start() + chunk.clip_point(overshoot, bias)
361 } else {
362 self.summary().lines
363 }
364 }
365
366 pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
367 let mut cursor = self.chunks.cursor::<PointUtf16>();
368 cursor.seek(&point.0, Bias::Right, &());
369 if let Some(chunk) = cursor.item() {
370 let overshoot = Unclipped(point.0 - cursor.start());
371 *cursor.start() + chunk.clip_point_utf16(overshoot, bias)
372 } else {
373 self.summary().lines_utf16()
374 }
375 }
376
377 pub fn line_len(&self, row: u32) -> u32 {
378 self.clip_point(Point::new(row, u32::MAX), Bias::Left)
379 .column
380 }
381
382 pub fn fingerprint(&self) -> RopeFingerprint {
383 self.chunks.summary().fingerprint
384 }
385}
386
387impl<'a> From<&'a str> for Rope {
388 fn from(text: &'a str) -> Self {
389 let mut rope = Self::new();
390 rope.push(text);
391 rope
392 }
393}
394
395impl<'a> FromIterator<&'a str> for Rope {
396 fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
397 let mut rope = Rope::new();
398 for chunk in iter {
399 rope.push(chunk);
400 }
401 rope
402 }
403}
404
405impl From<String> for Rope {
406 fn from(text: String) -> Self {
407 Rope::from(text.as_str())
408 }
409}
410
411impl fmt::Display for Rope {
412 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
413 for chunk in self.chunks() {
414 write!(f, "{}", chunk)?;
415 }
416 Ok(())
417 }
418}
419
420impl fmt::Debug for Rope {
421 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
422 use std::fmt::Write as _;
423
424 write!(f, "\"")?;
425 let mut format_string = String::new();
426 for chunk in self.chunks() {
427 write!(&mut format_string, "{:?}", chunk)?;
428 write!(f, "{}", &format_string[1..format_string.len() - 1])?;
429 format_string.clear();
430 }
431 write!(f, "\"")?;
432 Ok(())
433 }
434}
435
436pub struct Cursor<'a> {
437 rope: &'a Rope,
438 chunks: sum_tree::Cursor<'a, Chunk, usize>,
439 offset: usize,
440}
441
442impl<'a> Cursor<'a> {
443 pub fn new(rope: &'a Rope, offset: usize) -> Self {
444 let mut chunks = rope.chunks.cursor();
445 chunks.seek(&offset, Bias::Right, &());
446 Self {
447 rope,
448 chunks,
449 offset,
450 }
451 }
452
453 pub fn seek_forward(&mut self, end_offset: usize) {
454 debug_assert!(end_offset >= self.offset);
455
456 self.chunks.seek_forward(&end_offset, Bias::Right, &());
457 self.offset = end_offset;
458 }
459
460 pub fn slice(&mut self, end_offset: usize) -> Rope {
461 debug_assert!(
462 end_offset >= self.offset,
463 "cannot slice backwards from {} to {}",
464 self.offset,
465 end_offset
466 );
467
468 let mut slice = Rope::new();
469 if let Some(start_chunk) = self.chunks.item() {
470 let start_ix = self.offset - self.chunks.start();
471 let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
472 slice.push(&start_chunk.0[start_ix..end_ix]);
473 }
474
475 if end_offset > self.chunks.end(&()) {
476 self.chunks.next(&());
477 slice.append(Rope {
478 chunks: self.chunks.slice(&end_offset, Bias::Right, &()),
479 });
480 if let Some(end_chunk) = self.chunks.item() {
481 let end_ix = end_offset - self.chunks.start();
482 slice.push(&end_chunk.0[..end_ix]);
483 }
484 }
485
486 self.offset = end_offset;
487 slice
488 }
489
490 pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
491 debug_assert!(end_offset >= self.offset);
492
493 let mut summary = D::default();
494 if let Some(start_chunk) = self.chunks.item() {
495 let start_ix = self.offset - self.chunks.start();
496 let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
497 summary.add_assign(&D::from_text_summary(&TextSummary::from(
498 &start_chunk.0[start_ix..end_ix],
499 )));
500 }
501
502 if end_offset > self.chunks.end(&()) {
503 self.chunks.next(&());
504 summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right, &()));
505 if let Some(end_chunk) = self.chunks.item() {
506 let end_ix = end_offset - self.chunks.start();
507 summary.add_assign(&D::from_text_summary(&TextSummary::from(
508 &end_chunk.0[..end_ix],
509 )));
510 }
511 }
512
513 self.offset = end_offset;
514 summary
515 }
516
517 pub fn suffix(mut self) -> Rope {
518 self.slice(self.rope.chunks.extent(&()))
519 }
520
521 pub fn offset(&self) -> usize {
522 self.offset
523 }
524}
525
526pub struct Chunks<'a> {
527 chunks: sum_tree::Cursor<'a, Chunk, usize>,
528 range: Range<usize>,
529 reversed: bool,
530}
531
532impl<'a> Chunks<'a> {
533 pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
534 let mut chunks = rope.chunks.cursor();
535 if reversed {
536 chunks.seek(&range.end, Bias::Left, &());
537 } else {
538 chunks.seek(&range.start, Bias::Right, &());
539 }
540 Self {
541 chunks,
542 range,
543 reversed,
544 }
545 }
546
547 pub fn offset(&self) -> usize {
548 if self.reversed {
549 self.range.end.min(self.chunks.end(&()))
550 } else {
551 self.range.start.max(*self.chunks.start())
552 }
553 }
554
555 pub fn seek(&mut self, offset: usize) {
556 let bias = if self.reversed {
557 Bias::Left
558 } else {
559 Bias::Right
560 };
561
562 if offset >= self.chunks.end(&()) {
563 self.chunks.seek_forward(&offset, bias, &());
564 } else {
565 self.chunks.seek(&offset, bias, &());
566 }
567
568 if self.reversed {
569 self.range.end = offset;
570 } else {
571 self.range.start = offset;
572 }
573 }
574
575 pub fn peek(&self) -> Option<&'a str> {
576 let chunk = self.chunks.item()?;
577 if self.reversed && self.range.start >= self.chunks.end(&()) {
578 return None;
579 }
580 let chunk_start = *self.chunks.start();
581 if self.range.end <= chunk_start {
582 return None;
583 }
584
585 let start = self.range.start.saturating_sub(chunk_start);
586 let end = self.range.end - chunk_start;
587 Some(&chunk.0[start..chunk.0.len().min(end)])
588 }
589}
590
591impl<'a> Iterator for Chunks<'a> {
592 type Item = &'a str;
593
594 fn next(&mut self) -> Option<Self::Item> {
595 let result = self.peek();
596 if result.is_some() {
597 if self.reversed {
598 self.chunks.prev(&());
599 } else {
600 self.chunks.next(&());
601 }
602 }
603 result
604 }
605}
606
607pub struct Bytes<'a> {
608 chunks: sum_tree::Cursor<'a, Chunk, usize>,
609 range: Range<usize>,
610 reversed: bool,
611}
612
613impl<'a> Bytes<'a> {
614 pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
615 let mut chunks = rope.chunks.cursor();
616 if reversed {
617 chunks.seek(&range.end, Bias::Left, &());
618 } else {
619 chunks.seek(&range.start, Bias::Right, &());
620 }
621 Self {
622 chunks,
623 range,
624 reversed,
625 }
626 }
627
628 pub fn peek(&self) -> Option<&'a [u8]> {
629 let chunk = self.chunks.item()?;
630 if self.reversed && self.range.start >= self.chunks.end(&()) {
631 return None;
632 }
633 let chunk_start = *self.chunks.start();
634 if self.range.end <= chunk_start {
635 return None;
636 }
637 let start = self.range.start.saturating_sub(chunk_start);
638 let end = self.range.end - chunk_start;
639 Some(&chunk.0.as_bytes()[start..chunk.0.len().min(end)])
640 }
641}
642
643impl<'a> Iterator for Bytes<'a> {
644 type Item = &'a [u8];
645
646 fn next(&mut self) -> Option<Self::Item> {
647 let result = self.peek();
648 if result.is_some() {
649 if self.reversed {
650 self.chunks.prev(&());
651 } else {
652 self.chunks.next(&());
653 }
654 }
655 result
656 }
657}
658
659impl<'a> io::Read for Bytes<'a> {
660 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
661 if let Some(chunk) = self.peek() {
662 let len = cmp::min(buf.len(), chunk.len());
663 if self.reversed {
664 buf[..len].copy_from_slice(&chunk[chunk.len() - len..]);
665 buf[..len].reverse();
666 self.range.end -= len;
667 } else {
668 buf[..len].copy_from_slice(&chunk[..len]);
669 self.range.start += len;
670 }
671
672 if len == chunk.len() {
673 if self.reversed {
674 self.chunks.prev(&());
675 } else {
676 self.chunks.next(&());
677 }
678 }
679 Ok(len)
680 } else {
681 Ok(0)
682 }
683 }
684}
685
686#[derive(Clone, Debug, Default)]
687struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
688
689impl Chunk {
690 fn offset_to_offset_utf16(&self, target: usize) -> OffsetUtf16 {
691 let mut offset = 0;
692 let mut offset_utf16 = OffsetUtf16(0);
693 for ch in self.0.chars() {
694 if offset >= target {
695 break;
696 }
697
698 offset += ch.len_utf8();
699 offset_utf16.0 += ch.len_utf16();
700 }
701 offset_utf16
702 }
703
704 fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
705 let mut offset_utf16 = OffsetUtf16(0);
706 let mut offset = 0;
707 for ch in self.0.chars() {
708 if offset_utf16 >= target {
709 break;
710 }
711
712 offset += ch.len_utf8();
713 offset_utf16.0 += ch.len_utf16();
714 }
715 offset
716 }
717
718 fn offset_to_point(&self, target: usize) -> Point {
719 let mut offset = 0;
720 let mut point = Point::new(0, 0);
721 for ch in self.0.chars() {
722 if offset >= target {
723 break;
724 }
725
726 if ch == '\n' {
727 point.row += 1;
728 point.column = 0;
729 } else {
730 point.column += ch.len_utf8() as u32;
731 }
732 offset += ch.len_utf8();
733 }
734 point
735 }
736
737 fn offset_to_point_utf16(&self, target: usize) -> PointUtf16 {
738 let mut offset = 0;
739 let mut point = PointUtf16::new(0, 0);
740 for ch in self.0.chars() {
741 if offset >= target {
742 break;
743 }
744
745 if ch == '\n' {
746 point.row += 1;
747 point.column = 0;
748 } else {
749 point.column += ch.len_utf16() as u32;
750 }
751 offset += ch.len_utf8();
752 }
753 point
754 }
755
756 fn point_to_offset(&self, target: Point) -> usize {
757 let mut offset = 0;
758 let mut point = Point::new(0, 0);
759
760 for ch in self.0.chars() {
761 if point >= target {
762 if point > target {
763 debug_panic!("point {target:?} is inside of character {ch:?}");
764 }
765 break;
766 }
767
768 if ch == '\n' {
769 point.row += 1;
770 point.column = 0;
771
772 if point.row > target.row {
773 debug_panic!(
774 "point {target:?} is beyond the end of a line with length {}",
775 point.column
776 );
777 break;
778 }
779 } else {
780 point.column += ch.len_utf8() as u32;
781 }
782
783 offset += ch.len_utf8();
784 }
785
786 offset
787 }
788
789 fn point_to_point_utf16(&self, target: Point) -> PointUtf16 {
790 let mut point = Point::zero();
791 let mut point_utf16 = PointUtf16::new(0, 0);
792 for ch in self.0.chars() {
793 if point >= target {
794 break;
795 }
796
797 if ch == '\n' {
798 point_utf16.row += 1;
799 point_utf16.column = 0;
800 point.row += 1;
801 point.column = 0;
802 } else {
803 point_utf16.column += ch.len_utf16() as u32;
804 point.column += ch.len_utf8() as u32;
805 }
806 }
807 point_utf16
808 }
809
810 fn point_utf16_to_offset(&self, target: PointUtf16, clip: bool) -> usize {
811 let mut offset = 0;
812 let mut point = PointUtf16::new(0, 0);
813
814 for ch in self.0.chars() {
815 if point == target {
816 break;
817 }
818
819 if ch == '\n' {
820 point.row += 1;
821 point.column = 0;
822
823 if point.row > target.row {
824 if !clip {
825 debug_panic!(
826 "point {target:?} is beyond the end of a line with length {}",
827 point.column
828 );
829 }
830 // Return the offset of the newline
831 return offset;
832 }
833 } else {
834 point.column += ch.len_utf16() as u32;
835 }
836
837 if point > target {
838 if !clip {
839 debug_panic!("point {target:?} is inside of codepoint {ch:?}");
840 }
841 // Return the offset of the codepoint which we have landed within, bias left
842 return offset;
843 }
844
845 offset += ch.len_utf8();
846 }
847
848 offset
849 }
850
851 fn unclipped_point_utf16_to_point(&self, target: Unclipped<PointUtf16>) -> Point {
852 let mut point = Point::zero();
853 let mut point_utf16 = PointUtf16::zero();
854
855 for ch in self.0.chars() {
856 if point_utf16 == target.0 {
857 break;
858 }
859
860 if point_utf16 > target.0 {
861 // If the point is past the end of a line or inside of a code point,
862 // return the last valid point before the target.
863 return point;
864 }
865
866 if ch == '\n' {
867 point_utf16 += PointUtf16::new(1, 0);
868 point += Point::new(1, 0);
869 } else {
870 point_utf16 += PointUtf16::new(0, ch.len_utf16() as u32);
871 point += Point::new(0, ch.len_utf8() as u32);
872 }
873 }
874
875 point
876 }
877
878 fn clip_point(&self, target: Point, bias: Bias) -> Point {
879 for (row, line) in self.0.split('\n').enumerate() {
880 if row == target.row as usize {
881 let mut column = target.column.min(line.len() as u32);
882 while !line.is_char_boundary(column as usize) {
883 match bias {
884 Bias::Left => column -= 1,
885 Bias::Right => column += 1,
886 }
887 }
888 return Point::new(row as u32, column);
889 }
890 }
891 unreachable!()
892 }
893
894 fn clip_point_utf16(&self, target: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
895 for (row, line) in self.0.split('\n').enumerate() {
896 if row == target.0.row as usize {
897 let mut code_units = line.encode_utf16();
898 let mut column = code_units.by_ref().take(target.0.column as usize).count();
899 if char::decode_utf16(code_units).next().transpose().is_err() {
900 match bias {
901 Bias::Left => column -= 1,
902 Bias::Right => column += 1,
903 }
904 }
905 return PointUtf16::new(row as u32, column as u32);
906 }
907 }
908 unreachable!()
909 }
910
911 fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
912 let mut code_units = self.0.encode_utf16();
913 let mut offset = code_units.by_ref().take(target.0).count();
914 if char::decode_utf16(code_units).next().transpose().is_err() {
915 match bias {
916 Bias::Left => offset -= 1,
917 Bias::Right => offset += 1,
918 }
919 }
920 OffsetUtf16(offset)
921 }
922}
923
924impl sum_tree::Item for Chunk {
925 type Summary = ChunkSummary;
926
927 fn summary(&self) -> Self::Summary {
928 ChunkSummary::from(self.0.as_str())
929 }
930}
931
932#[derive(Clone, Debug, Default, Eq, PartialEq)]
933pub struct ChunkSummary {
934 text: TextSummary,
935 fingerprint: RopeFingerprint,
936}
937
938impl<'a> From<&'a str> for ChunkSummary {
939 fn from(text: &'a str) -> Self {
940 Self {
941 text: TextSummary::from(text),
942 fingerprint: Rope::text_fingerprint(text),
943 }
944 }
945}
946
947impl sum_tree::Summary for ChunkSummary {
948 type Context = ();
949
950 fn add_summary(&mut self, summary: &Self, _: &()) {
951 self.text += &summary.text;
952 self.fingerprint = self.fingerprint * summary.fingerprint;
953 }
954}
955
956#[derive(Clone, Debug, Default, Eq, PartialEq)]
957pub struct TextSummary {
958 pub len: usize,
959 pub len_utf16: OffsetUtf16,
960 pub lines: Point,
961 pub first_line_chars: u32,
962 pub last_line_chars: u32,
963 pub last_line_len_utf16: u32,
964 pub longest_row: u32,
965 pub longest_row_chars: u32,
966}
967
968impl TextSummary {
969 pub fn lines_utf16(&self) -> PointUtf16 {
970 PointUtf16 {
971 row: self.lines.row,
972 column: self.last_line_len_utf16,
973 }
974 }
975}
976
977impl<'a> From<&'a str> for TextSummary {
978 fn from(text: &'a str) -> Self {
979 let mut len_utf16 = OffsetUtf16(0);
980 let mut lines = Point::new(0, 0);
981 let mut first_line_chars = 0;
982 let mut last_line_chars = 0;
983 let mut last_line_len_utf16 = 0;
984 let mut longest_row = 0;
985 let mut longest_row_chars = 0;
986 for c in text.chars() {
987 len_utf16.0 += c.len_utf16();
988
989 if c == '\n' {
990 lines += Point::new(1, 0);
991 last_line_len_utf16 = 0;
992 last_line_chars = 0;
993 } else {
994 lines.column += c.len_utf8() as u32;
995 last_line_len_utf16 += c.len_utf16() as u32;
996 last_line_chars += 1;
997 }
998
999 if lines.row == 0 {
1000 first_line_chars = last_line_chars;
1001 }
1002
1003 if last_line_chars > longest_row_chars {
1004 longest_row = lines.row;
1005 longest_row_chars = last_line_chars;
1006 }
1007 }
1008
1009 TextSummary {
1010 len: text.len(),
1011 len_utf16,
1012 lines,
1013 first_line_chars,
1014 last_line_chars,
1015 last_line_len_utf16,
1016 longest_row,
1017 longest_row_chars,
1018 }
1019 }
1020}
1021
1022impl sum_tree::Summary for TextSummary {
1023 type Context = ();
1024
1025 fn add_summary(&mut self, summary: &Self, _: &Self::Context) {
1026 *self += summary;
1027 }
1028}
1029
1030impl std::ops::Add<Self> for TextSummary {
1031 type Output = Self;
1032
1033 fn add(mut self, rhs: Self) -> Self::Output {
1034 AddAssign::add_assign(&mut self, &rhs);
1035 self
1036 }
1037}
1038
1039impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
1040 fn add_assign(&mut self, other: &'a Self) {
1041 let joined_chars = self.last_line_chars + other.first_line_chars;
1042 if joined_chars > self.longest_row_chars {
1043 self.longest_row = self.lines.row;
1044 self.longest_row_chars = joined_chars;
1045 }
1046 if other.longest_row_chars > self.longest_row_chars {
1047 self.longest_row = self.lines.row + other.longest_row;
1048 self.longest_row_chars = other.longest_row_chars;
1049 }
1050
1051 if self.lines.row == 0 {
1052 self.first_line_chars += other.first_line_chars;
1053 }
1054
1055 if other.lines.row == 0 {
1056 self.last_line_chars += other.first_line_chars;
1057 self.last_line_len_utf16 += other.last_line_len_utf16;
1058 } else {
1059 self.last_line_chars = other.last_line_chars;
1060 self.last_line_len_utf16 = other.last_line_len_utf16;
1061 }
1062
1063 self.len += other.len;
1064 self.len_utf16 += other.len_utf16;
1065 self.lines += other.lines;
1066 }
1067}
1068
1069impl std::ops::AddAssign<Self> for TextSummary {
1070 fn add_assign(&mut self, other: Self) {
1071 *self += &other;
1072 }
1073}
1074
1075pub trait TextDimension: 'static + for<'a> Dimension<'a, ChunkSummary> {
1076 fn from_text_summary(summary: &TextSummary) -> Self;
1077 fn add_assign(&mut self, other: &Self);
1078}
1079
1080impl<D1: TextDimension, D2: TextDimension> TextDimension for (D1, D2) {
1081 fn from_text_summary(summary: &TextSummary) -> Self {
1082 (
1083 D1::from_text_summary(summary),
1084 D2::from_text_summary(summary),
1085 )
1086 }
1087
1088 fn add_assign(&mut self, other: &Self) {
1089 self.0.add_assign(&other.0);
1090 self.1.add_assign(&other.1);
1091 }
1092}
1093
1094impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
1095 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1096 *self += &summary.text;
1097 }
1098}
1099
1100impl TextDimension for TextSummary {
1101 fn from_text_summary(summary: &TextSummary) -> Self {
1102 summary.clone()
1103 }
1104
1105 fn add_assign(&mut self, other: &Self) {
1106 *self += other;
1107 }
1108}
1109
1110impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
1111 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1112 *self += summary.text.len;
1113 }
1114}
1115
1116impl TextDimension for usize {
1117 fn from_text_summary(summary: &TextSummary) -> Self {
1118 summary.len
1119 }
1120
1121 fn add_assign(&mut self, other: &Self) {
1122 *self += other;
1123 }
1124}
1125
1126impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
1127 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1128 *self += summary.text.len_utf16;
1129 }
1130}
1131
1132impl TextDimension for OffsetUtf16 {
1133 fn from_text_summary(summary: &TextSummary) -> Self {
1134 summary.len_utf16
1135 }
1136
1137 fn add_assign(&mut self, other: &Self) {
1138 *self += other;
1139 }
1140}
1141
1142impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
1143 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1144 *self += summary.text.lines;
1145 }
1146}
1147
1148impl TextDimension for Point {
1149 fn from_text_summary(summary: &TextSummary) -> Self {
1150 summary.lines
1151 }
1152
1153 fn add_assign(&mut self, other: &Self) {
1154 *self += other;
1155 }
1156}
1157
1158impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
1159 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1160 *self += summary.text.lines_utf16();
1161 }
1162}
1163
1164impl TextDimension for PointUtf16 {
1165 fn from_text_summary(summary: &TextSummary) -> Self {
1166 summary.lines_utf16()
1167 }
1168
1169 fn add_assign(&mut self, other: &Self) {
1170 *self += other;
1171 }
1172}
1173
1174#[cfg(test)]
1175mod tests {
1176 use super::*;
1177 use rand::prelude::*;
1178 use std::{cmp::Ordering, env, io::Read};
1179 use util::RandomCharIter;
1180 use Bias::{Left, Right};
1181
1182 #[test]
1183 fn test_all_4_byte_chars() {
1184 let mut rope = Rope::new();
1185 let text = "🏀".repeat(256);
1186 rope.push(&text);
1187 assert_eq!(rope.text(), text);
1188 }
1189
1190 #[test]
1191 fn test_clip() {
1192 let rope = Rope::from("🧘");
1193
1194 assert_eq!(rope.clip_offset(1, Bias::Left), 0);
1195 assert_eq!(rope.clip_offset(1, Bias::Right), 4);
1196 assert_eq!(rope.clip_offset(5, Bias::Right), 4);
1197
1198 assert_eq!(
1199 rope.clip_point(Point::new(0, 1), Bias::Left),
1200 Point::new(0, 0)
1201 );
1202 assert_eq!(
1203 rope.clip_point(Point::new(0, 1), Bias::Right),
1204 Point::new(0, 4)
1205 );
1206 assert_eq!(
1207 rope.clip_point(Point::new(0, 5), Bias::Right),
1208 Point::new(0, 4)
1209 );
1210
1211 assert_eq!(
1212 rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Left),
1213 PointUtf16::new(0, 0)
1214 );
1215 assert_eq!(
1216 rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Right),
1217 PointUtf16::new(0, 2)
1218 );
1219 assert_eq!(
1220 rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 3)), Bias::Right),
1221 PointUtf16::new(0, 2)
1222 );
1223
1224 assert_eq!(
1225 rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
1226 OffsetUtf16(0)
1227 );
1228 assert_eq!(
1229 rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
1230 OffsetUtf16(2)
1231 );
1232 assert_eq!(
1233 rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
1234 OffsetUtf16(2)
1235 );
1236 }
1237
1238 #[gpui::test(iterations = 100)]
1239 fn test_random_rope(mut rng: StdRng) {
1240 let operations = env::var("OPERATIONS")
1241 .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1242 .unwrap_or(10);
1243
1244 let mut expected = String::new();
1245 let mut actual = Rope::new();
1246 for _ in 0..operations {
1247 let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1248 let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1249 let len = rng.gen_range(0..=64);
1250 let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1251
1252 let mut new_actual = Rope::new();
1253 let mut cursor = actual.cursor(0);
1254 new_actual.append(cursor.slice(start_ix));
1255 new_actual.push(&new_text);
1256 cursor.seek_forward(end_ix);
1257 new_actual.append(cursor.suffix());
1258 actual = new_actual;
1259
1260 expected.replace_range(start_ix..end_ix, &new_text);
1261
1262 assert_eq!(actual.text(), expected);
1263 log::info!("text: {:?}", expected);
1264
1265 for _ in 0..5 {
1266 let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1267 let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1268
1269 let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1270 assert_eq!(actual_text, &expected[start_ix..end_ix]);
1271
1272 let mut actual_text = String::new();
1273 actual
1274 .bytes_in_range(start_ix..end_ix)
1275 .read_to_string(&mut actual_text)
1276 .unwrap();
1277 assert_eq!(actual_text, &expected[start_ix..end_ix]);
1278
1279 assert_eq!(
1280 actual
1281 .reversed_chunks_in_range(start_ix..end_ix)
1282 .collect::<Vec<&str>>()
1283 .into_iter()
1284 .rev()
1285 .collect::<String>(),
1286 &expected[start_ix..end_ix]
1287 );
1288 }
1289
1290 let mut offset_utf16 = OffsetUtf16(0);
1291 let mut point = Point::new(0, 0);
1292 let mut point_utf16 = PointUtf16::new(0, 0);
1293 for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
1294 assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
1295 assert_eq!(
1296 actual.offset_to_point_utf16(ix),
1297 point_utf16,
1298 "offset_to_point_utf16({})",
1299 ix
1300 );
1301 assert_eq!(
1302 actual.point_to_offset(point),
1303 ix,
1304 "point_to_offset({:?})",
1305 point
1306 );
1307 assert_eq!(
1308 actual.point_utf16_to_offset(point_utf16),
1309 ix,
1310 "point_utf16_to_offset({:?})",
1311 point_utf16
1312 );
1313 assert_eq!(
1314 actual.offset_to_offset_utf16(ix),
1315 offset_utf16,
1316 "offset_to_offset_utf16({:?})",
1317 ix
1318 );
1319 assert_eq!(
1320 actual.offset_utf16_to_offset(offset_utf16),
1321 ix,
1322 "offset_utf16_to_offset({:?})",
1323 offset_utf16
1324 );
1325 if ch == '\n' {
1326 point += Point::new(1, 0);
1327 point_utf16 += PointUtf16::new(1, 0);
1328 } else {
1329 point.column += ch.len_utf8() as u32;
1330 point_utf16.column += ch.len_utf16() as u32;
1331 }
1332 offset_utf16.0 += ch.len_utf16();
1333 }
1334
1335 let mut offset_utf16 = OffsetUtf16(0);
1336 let mut point_utf16 = Unclipped(PointUtf16::zero());
1337 for unit in expected.encode_utf16() {
1338 let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
1339 let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
1340 assert!(right_offset >= left_offset);
1341 // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
1342 actual.offset_utf16_to_offset(left_offset);
1343 actual.offset_utf16_to_offset(right_offset);
1344
1345 let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
1346 let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
1347 assert!(right_point >= left_point);
1348 // Ensure translating valid UTF-16 points to offsets doesn't panic.
1349 actual.point_utf16_to_offset(left_point);
1350 actual.point_utf16_to_offset(right_point);
1351
1352 offset_utf16.0 += 1;
1353 if unit == b'\n' as u16 {
1354 point_utf16.0 += PointUtf16::new(1, 0);
1355 } else {
1356 point_utf16.0 += PointUtf16::new(0, 1);
1357 }
1358 }
1359
1360 for _ in 0..5 {
1361 let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1362 let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1363 assert_eq!(
1364 actual.cursor(start_ix).summary::<TextSummary>(end_ix),
1365 TextSummary::from(&expected[start_ix..end_ix])
1366 );
1367 }
1368
1369 let mut expected_longest_rows = Vec::new();
1370 let mut longest_line_len = -1_isize;
1371 for (row, line) in expected.split('\n').enumerate() {
1372 let row = row as u32;
1373 assert_eq!(
1374 actual.line_len(row),
1375 line.len() as u32,
1376 "invalid line len for row {}",
1377 row
1378 );
1379
1380 let line_char_count = line.chars().count() as isize;
1381 match line_char_count.cmp(&longest_line_len) {
1382 Ordering::Less => {}
1383 Ordering::Equal => expected_longest_rows.push(row),
1384 Ordering::Greater => {
1385 longest_line_len = line_char_count;
1386 expected_longest_rows.clear();
1387 expected_longest_rows.push(row);
1388 }
1389 }
1390 }
1391
1392 let longest_row = actual.summary().longest_row;
1393 assert!(
1394 expected_longest_rows.contains(&longest_row),
1395 "incorrect longest row {}. expected {:?} with length {}",
1396 longest_row,
1397 expected_longest_rows,
1398 longest_line_len,
1399 );
1400 }
1401 }
1402
1403 fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
1404 while !text.is_char_boundary(offset) {
1405 match bias {
1406 Bias::Left => offset -= 1,
1407 Bias::Right => offset += 1,
1408 }
1409 }
1410 offset
1411 }
1412
1413 impl Rope {
1414 fn text(&self) -> String {
1415 let mut text = String::new();
1416 for chunk in self.chunks.cursor::<()>() {
1417 text.push_str(&chunk.0);
1418 }
1419 text
1420 }
1421 }
1422}