1mod offset_utf16;
2mod point;
3mod point_utf16;
4mod unclipped;
5
6use arrayvec::ArrayString;
7use bromberg_sl2::HashMatrix;
8use smallvec::SmallVec;
9use std::{
10 cmp, fmt, io, mem,
11 ops::{AddAssign, Range},
12 str,
13};
14use sum_tree::{Bias, Dimension, SumTree};
15use util::debug_panic;
16
17pub use offset_utf16::OffsetUtf16;
18pub use point::Point;
19pub use point_utf16::PointUtf16;
20pub use unclipped::Unclipped;
21
22#[cfg(test)]
23const CHUNK_BASE: usize = 6;
24
25#[cfg(not(test))]
26const CHUNK_BASE: usize = 16;
27
28/// Type alias to [`HashMatrix`], an implementation of a homomorphic hash function. Two [`Rope`] instances
29/// containing the same text will produce the same fingerprint. This hash function is special in that
30/// it allows us to hash individual chunks and aggregate them up the [`Rope`]'s tree, with the resulting
31/// hash being equivalent to hashing all the text contained in the [`Rope`] at once.
32pub type RopeFingerprint = HashMatrix;
33
34#[derive(Clone, Default)]
35pub struct Rope {
36 chunks: SumTree<Chunk>,
37}
38
39impl Rope {
40 pub fn new() -> Self {
41 Self::default()
42 }
43
44 pub fn text_fingerprint(text: &str) -> RopeFingerprint {
45 bromberg_sl2::hash_strict(text.as_bytes())
46 }
47
48 pub fn append(&mut self, rope: Rope) {
49 let mut chunks = rope.chunks.cursor::<()>();
50 chunks.next(&());
51 if let Some(chunk) = chunks.item() {
52 if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE)
53 || chunk.0.len() < CHUNK_BASE
54 {
55 self.push(&chunk.0);
56 chunks.next(&());
57 }
58 }
59
60 self.chunks.append(chunks.suffix(&()), &());
61 self.check_invariants();
62 }
63
64 pub fn replace(&mut self, range: Range<usize>, text: &str) {
65 let mut new_rope = Rope::new();
66 let mut cursor = self.cursor(0);
67 new_rope.append(cursor.slice(range.start));
68 cursor.seek_forward(range.end);
69 new_rope.push(text);
70 new_rope.append(cursor.suffix());
71 *self = new_rope;
72 }
73
74 pub fn slice(&self, range: Range<usize>) -> Rope {
75 let mut cursor = self.cursor(0);
76 cursor.seek_forward(range.start);
77 cursor.slice(range.end)
78 }
79
80 pub fn slice_rows(&self, range: Range<u32>) -> Rope {
81 // This would be more efficient with a forward advance after the first, but it's fine.
82 let start = self.point_to_offset(Point::new(range.start, 0));
83 let end = self.point_to_offset(Point::new(range.end, 0));
84 self.slice(start..end)
85 }
86
87 pub fn push(&mut self, mut text: &str) {
88 self.chunks.update_last(
89 |last_chunk| {
90 let split_ix = if last_chunk.0.len() + text.len() <= 2 * CHUNK_BASE {
91 text.len()
92 } else {
93 let mut split_ix =
94 cmp::min(CHUNK_BASE.saturating_sub(last_chunk.0.len()), text.len());
95 while !text.is_char_boundary(split_ix) {
96 split_ix += 1;
97 }
98 split_ix
99 };
100
101 let (suffix, remainder) = text.split_at(split_ix);
102 last_chunk.0.push_str(suffix);
103 text = remainder;
104 },
105 &(),
106 );
107
108 let mut new_chunks = SmallVec::<[_; 16]>::new();
109 while !text.is_empty() {
110 let mut split_ix = cmp::min(2 * CHUNK_BASE, text.len());
111 while !text.is_char_boundary(split_ix) {
112 split_ix -= 1;
113 }
114 let (chunk, remainder) = text.split_at(split_ix);
115 new_chunks.push(Chunk(ArrayString::from(chunk).unwrap()));
116 text = remainder;
117 }
118
119 #[cfg(test)]
120 const PARALLEL_THRESHOLD: usize = 4;
121 #[cfg(not(test))]
122 const PARALLEL_THRESHOLD: usize = 4 * (2 * sum_tree::TREE_BASE);
123
124 if new_chunks.len() >= PARALLEL_THRESHOLD {
125 self.chunks.par_extend(new_chunks.into_vec(), &());
126 } else {
127 self.chunks.extend(new_chunks, &());
128 }
129
130 self.check_invariants();
131 }
132
133 pub fn push_front(&mut self, text: &str) {
134 let suffix = mem::replace(self, Rope::from(text));
135 self.append(suffix);
136 }
137
138 fn check_invariants(&self) {
139 #[cfg(test)]
140 {
141 // Ensure all chunks except maybe the last one are not underflowing.
142 // Allow some wiggle room for multibyte characters at chunk boundaries.
143 let mut chunks = self.chunks.cursor::<()>().peekable();
144 while let Some(chunk) = chunks.next() {
145 if chunks.peek().is_some() {
146 assert!(chunk.0.len() + 3 >= CHUNK_BASE);
147 }
148 }
149 }
150 }
151
152 pub fn summary(&self) -> TextSummary {
153 self.chunks.summary().text.clone()
154 }
155
156 pub fn len(&self) -> usize {
157 self.chunks.extent(&())
158 }
159
160 pub fn is_empty(&self) -> bool {
161 self.len() == 0
162 }
163
164 pub fn max_point(&self) -> Point {
165 self.chunks.extent(&())
166 }
167
168 pub fn max_point_utf16(&self) -> PointUtf16 {
169 self.chunks.extent(&())
170 }
171
172 pub fn cursor(&self, offset: usize) -> Cursor {
173 Cursor::new(self, offset)
174 }
175
176 pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
177 self.chars_at(0)
178 }
179
180 pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
181 self.chunks_in_range(start..self.len()).flat_map(str::chars)
182 }
183
184 pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
185 self.reversed_chunks_in_range(0..start)
186 .flat_map(|chunk| chunk.chars().rev())
187 }
188
189 pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes {
190 Bytes::new(self, range, false)
191 }
192
193 pub fn reversed_bytes_in_range(&self, range: Range<usize>) -> Bytes {
194 Bytes::new(self, range, true)
195 }
196
197 pub fn chunks(&self) -> Chunks {
198 self.chunks_in_range(0..self.len())
199 }
200
201 pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks {
202 Chunks::new(self, range, false)
203 }
204
205 pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks {
206 Chunks::new(self, range, true)
207 }
208
209 pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
210 if offset >= self.summary().len {
211 return self.summary().len_utf16;
212 }
213 let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>();
214 cursor.seek(&offset, Bias::Left, &());
215 let overshoot = offset - cursor.start().0;
216 cursor.start().1
217 + cursor.item().map_or(Default::default(), |chunk| {
218 chunk.offset_to_offset_utf16(overshoot)
219 })
220 }
221
222 pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
223 if offset >= self.summary().len_utf16 {
224 return self.summary().len;
225 }
226 let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>();
227 cursor.seek(&offset, Bias::Left, &());
228 let overshoot = offset - cursor.start().0;
229 cursor.start().1
230 + cursor.item().map_or(Default::default(), |chunk| {
231 chunk.offset_utf16_to_offset(overshoot)
232 })
233 }
234
235 pub fn offset_to_point(&self, offset: usize) -> Point {
236 if offset >= self.summary().len {
237 return self.summary().lines;
238 }
239 let mut cursor = self.chunks.cursor::<(usize, Point)>();
240 cursor.seek(&offset, Bias::Left, &());
241 let overshoot = offset - cursor.start().0;
242 cursor.start().1
243 + cursor
244 .item()
245 .map_or(Point::zero(), |chunk| chunk.offset_to_point(overshoot))
246 }
247
248 pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
249 if offset >= self.summary().len {
250 return self.summary().lines_utf16();
251 }
252 let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>();
253 cursor.seek(&offset, Bias::Left, &());
254 let overshoot = offset - cursor.start().0;
255 cursor.start().1
256 + cursor.item().map_or(PointUtf16::zero(), |chunk| {
257 chunk.offset_to_point_utf16(overshoot)
258 })
259 }
260
261 pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
262 if point >= self.summary().lines {
263 return self.summary().lines_utf16();
264 }
265 let mut cursor = self.chunks.cursor::<(Point, PointUtf16)>();
266 cursor.seek(&point, Bias::Left, &());
267 let overshoot = point - cursor.start().0;
268 cursor.start().1
269 + cursor.item().map_or(PointUtf16::zero(), |chunk| {
270 chunk.point_to_point_utf16(overshoot)
271 })
272 }
273
274 pub fn point_to_offset(&self, point: Point) -> usize {
275 if point >= self.summary().lines {
276 return self.summary().len;
277 }
278 let mut cursor = self.chunks.cursor::<(Point, usize)>();
279 cursor.seek(&point, Bias::Left, &());
280 let overshoot = point - cursor.start().0;
281 cursor.start().1
282 + cursor
283 .item()
284 .map_or(0, |chunk| chunk.point_to_offset(overshoot))
285 }
286
287 pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
288 self.point_utf16_to_offset_impl(point, false)
289 }
290
291 pub fn unclipped_point_utf16_to_offset(&self, point: Unclipped<PointUtf16>) -> usize {
292 self.point_utf16_to_offset_impl(point.0, true)
293 }
294
295 fn point_utf16_to_offset_impl(&self, point: PointUtf16, clip: bool) -> usize {
296 if point >= self.summary().lines_utf16() {
297 return self.summary().len;
298 }
299 let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>();
300 cursor.seek(&point, Bias::Left, &());
301 let overshoot = point - cursor.start().0;
302 cursor.start().1
303 + cursor
304 .item()
305 .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot, clip))
306 }
307
308 pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
309 if point.0 >= self.summary().lines_utf16() {
310 return self.summary().lines;
311 }
312 let mut cursor = self.chunks.cursor::<(PointUtf16, Point)>();
313 cursor.seek(&point.0, Bias::Left, &());
314 let overshoot = Unclipped(point.0 - cursor.start().0);
315 cursor.start().1
316 + cursor.item().map_or(Point::zero(), |chunk| {
317 chunk.unclipped_point_utf16_to_point(overshoot)
318 })
319 }
320
321 pub fn clip_offset(&self, mut offset: usize, bias: Bias) -> usize {
322 let mut cursor = self.chunks.cursor::<usize>();
323 cursor.seek(&offset, Bias::Left, &());
324 if let Some(chunk) = cursor.item() {
325 let mut ix = offset - cursor.start();
326 while !chunk.0.is_char_boundary(ix) {
327 match bias {
328 Bias::Left => {
329 ix -= 1;
330 offset -= 1;
331 }
332 Bias::Right => {
333 ix += 1;
334 offset += 1;
335 }
336 }
337 }
338 offset
339 } else {
340 self.summary().len
341 }
342 }
343
344 pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
345 let mut cursor = self.chunks.cursor::<OffsetUtf16>();
346 cursor.seek(&offset, Bias::Right, &());
347 if let Some(chunk) = cursor.item() {
348 let overshoot = offset - cursor.start();
349 *cursor.start() + chunk.clip_offset_utf16(overshoot, bias)
350 } else {
351 self.summary().len_utf16
352 }
353 }
354
355 pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
356 let mut cursor = self.chunks.cursor::<Point>();
357 cursor.seek(&point, Bias::Right, &());
358 if let Some(chunk) = cursor.item() {
359 let overshoot = point - cursor.start();
360 *cursor.start() + chunk.clip_point(overshoot, bias)
361 } else {
362 self.summary().lines
363 }
364 }
365
366 pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
367 let mut cursor = self.chunks.cursor::<PointUtf16>();
368 cursor.seek(&point.0, Bias::Right, &());
369 if let Some(chunk) = cursor.item() {
370 let overshoot = Unclipped(point.0 - cursor.start());
371 *cursor.start() + chunk.clip_point_utf16(overshoot, bias)
372 } else {
373 self.summary().lines_utf16()
374 }
375 }
376
377 pub fn line_len(&self, row: u32) -> u32 {
378 self.clip_point(Point::new(row, u32::MAX), Bias::Left)
379 .column
380 }
381
382 pub fn fingerprint(&self) -> RopeFingerprint {
383 self.chunks.summary().fingerprint
384 }
385}
386
387impl<'a> From<&'a str> for Rope {
388 fn from(text: &'a str) -> Self {
389 let mut rope = Self::new();
390 rope.push(text);
391 rope
392 }
393}
394
395impl<'a> FromIterator<&'a str> for Rope {
396 fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
397 let mut rope = Rope::new();
398 for chunk in iter {
399 rope.push(chunk);
400 }
401 rope
402 }
403}
404
405impl From<String> for Rope {
406 fn from(text: String) -> Self {
407 Rope::from(text.as_str())
408 }
409}
410
411impl fmt::Display for Rope {
412 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
413 for chunk in self.chunks() {
414 write!(f, "{}", chunk)?;
415 }
416 Ok(())
417 }
418}
419
420impl fmt::Debug for Rope {
421 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
422 use std::fmt::Write as _;
423
424 write!(f, "\"")?;
425 let mut format_string = String::new();
426 for chunk in self.chunks() {
427 write!(&mut format_string, "{:?}", chunk)?;
428 write!(f, "{}", &format_string[1..format_string.len() - 1])?;
429 format_string.clear();
430 }
431 write!(f, "\"")?;
432 Ok(())
433 }
434}
435
436pub struct Cursor<'a> {
437 rope: &'a Rope,
438 chunks: sum_tree::Cursor<'a, Chunk, usize>,
439 offset: usize,
440}
441
442impl<'a> Cursor<'a> {
443 pub fn new(rope: &'a Rope, offset: usize) -> Self {
444 let mut chunks = rope.chunks.cursor();
445 chunks.seek(&offset, Bias::Right, &());
446 Self {
447 rope,
448 chunks,
449 offset,
450 }
451 }
452
453 pub fn seek_forward(&mut self, end_offset: usize) {
454 debug_assert!(end_offset >= self.offset);
455
456 self.chunks.seek_forward(&end_offset, Bias::Right, &());
457 self.offset = end_offset;
458 }
459
460 pub fn slice(&mut self, end_offset: usize) -> Rope {
461 debug_assert!(
462 end_offset >= self.offset,
463 "cannot slice backwards from {} to {}",
464 self.offset,
465 end_offset
466 );
467
468 let mut slice = Rope::new();
469 if let Some(start_chunk) = self.chunks.item() {
470 let start_ix = self.offset - self.chunks.start();
471 let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
472 slice.push(&start_chunk.0[start_ix..end_ix]);
473 }
474
475 if end_offset > self.chunks.end(&()) {
476 self.chunks.next(&());
477 slice.append(Rope {
478 chunks: self.chunks.slice(&end_offset, Bias::Right, &()),
479 });
480 if let Some(end_chunk) = self.chunks.item() {
481 let end_ix = end_offset - self.chunks.start();
482 slice.push(&end_chunk.0[..end_ix]);
483 }
484 }
485
486 self.offset = end_offset;
487 slice
488 }
489
490 pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
491 debug_assert!(end_offset >= self.offset);
492
493 let mut summary = D::default();
494 if let Some(start_chunk) = self.chunks.item() {
495 let start_ix = self.offset - self.chunks.start();
496 let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
497 summary.add_assign(&D::from_text_summary(&TextSummary::from(
498 &start_chunk.0[start_ix..end_ix],
499 )));
500 }
501
502 if end_offset > self.chunks.end(&()) {
503 self.chunks.next(&());
504 summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right, &()));
505 if let Some(end_chunk) = self.chunks.item() {
506 let end_ix = end_offset - self.chunks.start();
507 summary.add_assign(&D::from_text_summary(&TextSummary::from(
508 &end_chunk.0[..end_ix],
509 )));
510 }
511 }
512
513 self.offset = end_offset;
514 summary
515 }
516
517 pub fn suffix(mut self) -> Rope {
518 self.slice(self.rope.chunks.extent(&()))
519 }
520
521 pub fn offset(&self) -> usize {
522 self.offset
523 }
524}
525
526pub struct Chunks<'a> {
527 chunks: sum_tree::Cursor<'a, Chunk, usize>,
528 range: Range<usize>,
529 reversed: bool,
530}
531
532impl<'a> Chunks<'a> {
533 pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
534 let mut chunks = rope.chunks.cursor();
535 if reversed {
536 chunks.seek(&range.end, Bias::Left, &());
537 } else {
538 chunks.seek(&range.start, Bias::Right, &());
539 }
540 Self {
541 chunks,
542 range,
543 reversed,
544 }
545 }
546
547 pub fn offset(&self) -> usize {
548 if self.reversed {
549 self.range.end.min(self.chunks.end(&()))
550 } else {
551 self.range.start.max(*self.chunks.start())
552 }
553 }
554
555 pub fn seek(&mut self, offset: usize) {
556 let bias = if self.reversed {
557 Bias::Left
558 } else {
559 Bias::Right
560 };
561
562 if offset >= self.chunks.end(&()) {
563 self.chunks.seek_forward(&offset, bias, &());
564 } else {
565 self.chunks.seek(&offset, bias, &());
566 }
567
568 if self.reversed {
569 self.range.end = offset;
570 } else {
571 self.range.start = offset;
572 }
573 }
574
575 pub fn peek(&self) -> Option<&'a str> {
576 let chunk = self.chunks.item()?;
577 if self.reversed && self.range.start >= self.chunks.end(&()) {
578 return None;
579 }
580 let chunk_start = *self.chunks.start();
581 if self.range.end <= chunk_start {
582 return None;
583 }
584
585 let start = self.range.start.saturating_sub(chunk_start);
586 let end = self.range.end - chunk_start;
587 Some(&chunk.0[start..chunk.0.len().min(end)])
588 }
589}
590
591impl<'a> Iterator for Chunks<'a> {
592 type Item = &'a str;
593
594 fn next(&mut self) -> Option<Self::Item> {
595 let result = self.peek();
596 if result.is_some() {
597 if self.reversed {
598 self.chunks.prev(&());
599 } else {
600 self.chunks.next(&());
601 }
602 }
603 result
604 }
605}
606
607pub struct Bytes<'a> {
608 chunks: sum_tree::Cursor<'a, Chunk, usize>,
609 range: Range<usize>,
610 reversed: bool,
611}
612
613impl<'a> Bytes<'a> {
614 pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
615 let mut chunks = rope.chunks.cursor();
616 if reversed {
617 chunks.seek(&range.end, Bias::Left, &());
618 } else {
619 chunks.seek(&range.start, Bias::Right, &());
620 }
621 Self {
622 chunks,
623 range,
624 reversed,
625 }
626 }
627
628 pub fn peek(&self) -> Option<&'a [u8]> {
629 let chunk = self.chunks.item()?;
630 if self.reversed && self.range.start >= self.chunks.end(&()) {
631 return None;
632 }
633 let chunk_start = *self.chunks.start();
634 if self.range.end <= chunk_start {
635 return None;
636 }
637 let start = self.range.start.saturating_sub(chunk_start);
638 let end = self.range.end - chunk_start;
639 Some(&chunk.0.as_bytes()[start..chunk.0.len().min(end)])
640 }
641}
642
643impl<'a> Iterator for Bytes<'a> {
644 type Item = &'a [u8];
645
646 fn next(&mut self) -> Option<Self::Item> {
647 let result = self.peek();
648 if result.is_some() {
649 if self.reversed {
650 self.chunks.prev(&());
651 } else {
652 self.chunks.next(&());
653 }
654 }
655 result
656 }
657}
658
659impl<'a> io::Read for Bytes<'a> {
660 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
661 if let Some(chunk) = self.peek() {
662 let len = cmp::min(buf.len(), chunk.len());
663 if self.reversed {
664 buf[..len].copy_from_slice(&chunk[chunk.len() - len..]);
665 buf[..len].reverse();
666 self.range.end -= len;
667 } else {
668 buf[..len].copy_from_slice(&chunk[..len]);
669 self.range.start += len;
670 }
671
672 if len == chunk.len() {
673 if self.reversed {
674 self.chunks.prev(&());
675 } else {
676 self.chunks.next(&());
677 }
678 }
679 Ok(len)
680 } else {
681 Ok(0)
682 }
683 }
684}
685
686#[derive(Clone, Debug, Default)]
687struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
688
689impl Chunk {
690 fn offset_to_offset_utf16(&self, target: usize) -> OffsetUtf16 {
691 let mut offset = 0;
692 let mut offset_utf16 = OffsetUtf16(0);
693 for ch in self.0.chars() {
694 if offset >= target {
695 break;
696 }
697
698 offset += ch.len_utf8();
699 offset_utf16.0 += ch.len_utf16();
700 }
701 offset_utf16
702 }
703
704 fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
705 let mut offset_utf16 = OffsetUtf16(0);
706 let mut offset = 0;
707 for ch in self.0.chars() {
708 if offset_utf16 >= target {
709 break;
710 }
711
712 offset += ch.len_utf8();
713 offset_utf16.0 += ch.len_utf16();
714 }
715 offset
716 }
717
718 fn offset_to_point(&self, target: usize) -> Point {
719 let mut offset = 0;
720 let mut point = Point::new(0, 0);
721 for ch in self.0.chars() {
722 if offset >= target {
723 break;
724 }
725
726 if ch == '\n' {
727 point.row += 1;
728 point.column = 0;
729 } else {
730 point.column += ch.len_utf8() as u32;
731 }
732 offset += ch.len_utf8();
733 }
734 point
735 }
736
737 fn offset_to_point_utf16(&self, target: usize) -> PointUtf16 {
738 let mut offset = 0;
739 let mut point = PointUtf16::new(0, 0);
740 for ch in self.0.chars() {
741 if offset >= target {
742 break;
743 }
744
745 if ch == '\n' {
746 point.row += 1;
747 point.column = 0;
748 } else {
749 point.column += ch.len_utf16() as u32;
750 }
751 offset += ch.len_utf8();
752 }
753 point
754 }
755
756 fn point_to_offset(&self, target: Point) -> usize {
757 let mut offset = 0;
758 let mut point = Point::new(0, 0);
759
760 for ch in self.0.chars() {
761 if point >= target {
762 if point > target {
763 debug_panic!("point {target:?} is inside of character {ch:?}");
764 }
765 break;
766 }
767
768 if ch == '\n' {
769 point.row += 1;
770 point.column = 0;
771
772 if point.row > target.row {
773 debug_panic!(
774 "point {target:?} is beyond the end of a line with length {}",
775 point.column
776 );
777 break;
778 }
779 } else {
780 point.column += ch.len_utf8() as u32;
781 }
782
783 offset += ch.len_utf8();
784 }
785
786 offset
787 }
788
789 fn point_to_point_utf16(&self, target: Point) -> PointUtf16 {
790 let mut point = Point::zero();
791 let mut point_utf16 = PointUtf16::new(0, 0);
792 for ch in self.0.chars() {
793 if point >= target {
794 break;
795 }
796
797 if ch == '\n' {
798 point_utf16.row += 1;
799 point_utf16.column = 0;
800 point.row += 1;
801 point.column = 0;
802 } else {
803 point_utf16.column += ch.len_utf16() as u32;
804 point.column += ch.len_utf8() as u32;
805 }
806 }
807 point_utf16
808 }
809
810 fn point_utf16_to_offset(&self, target: PointUtf16, clip: bool) -> usize {
811 let mut offset = 0;
812 let mut point = PointUtf16::new(0, 0);
813
814 for ch in self.0.chars() {
815 if point == target {
816 break;
817 }
818
819 if ch == '\n' {
820 point.row += 1;
821 point.column = 0;
822
823 if point.row > target.row {
824 if !clip {
825 debug_panic!(
826 "point {target:?} is beyond the end of a line with length {}",
827 point.column
828 );
829 }
830 // Return the offset of the newline
831 return offset;
832 }
833 } else {
834 point.column += ch.len_utf16() as u32;
835 }
836
837 if point > target {
838 if !clip {
839 debug_panic!("point {target:?} is inside of codepoint {ch:?}");
840 }
841 // Return the offset of the codepoint which we have landed within, bias left
842 return offset;
843 }
844
845 offset += ch.len_utf8();
846 }
847
848 offset
849 }
850
851 fn unclipped_point_utf16_to_point(&self, target: Unclipped<PointUtf16>) -> Point {
852 let mut point = Point::zero();
853 let mut point_utf16 = PointUtf16::zero();
854
855 for ch in self.0.chars() {
856 if point_utf16 == target.0 {
857 break;
858 }
859
860 if point_utf16 > target.0 {
861 // If the point is past the end of a line or inside of a code point,
862 // return the last valid point before the target.
863 return point;
864 }
865
866 if ch == '\n' {
867 point_utf16 += PointUtf16::new(1, 0);
868 point += Point::new(1, 0);
869 } else {
870 point_utf16 += PointUtf16::new(0, ch.len_utf16() as u32);
871 point += Point::new(0, ch.len_utf8() as u32);
872 }
873 }
874
875 point
876 }
877
878 fn clip_point(&self, target: Point, bias: Bias) -> Point {
879 for (row, line) in self.0.split('\n').enumerate() {
880 if row == target.row as usize {
881 let mut column = target.column.min(line.len() as u32);
882 while !line.is_char_boundary(column as usize) {
883 match bias {
884 Bias::Left => column -= 1,
885 Bias::Right => column += 1,
886 }
887 }
888 return Point::new(row as u32, column);
889 }
890 }
891 unreachable!()
892 }
893
894 fn clip_point_utf16(&self, target: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
895 for (row, line) in self.0.split('\n').enumerate() {
896 if row == target.0.row as usize {
897 let mut code_units = line.encode_utf16();
898 let mut column = code_units.by_ref().take(target.0.column as usize).count();
899 if char::decode_utf16(code_units).next().transpose().is_err() {
900 match bias {
901 Bias::Left => column -= 1,
902 Bias::Right => column += 1,
903 }
904 }
905 return PointUtf16::new(row as u32, column as u32);
906 }
907 }
908 unreachable!()
909 }
910
911 fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
912 let mut code_units = self.0.encode_utf16();
913 let mut offset = code_units.by_ref().take(target.0).count();
914 if char::decode_utf16(code_units).next().transpose().is_err() {
915 match bias {
916 Bias::Left => offset -= 1,
917 Bias::Right => offset += 1,
918 }
919 }
920 OffsetUtf16(offset)
921 }
922}
923
924impl sum_tree::Item for Chunk {
925 type Summary = ChunkSummary;
926
927 fn summary(&self) -> Self::Summary {
928 ChunkSummary::from(self.0.as_str())
929 }
930}
931
932#[derive(Clone, Debug, Default, Eq, PartialEq)]
933pub struct ChunkSummary {
934 text: TextSummary,
935 fingerprint: RopeFingerprint,
936}
937
938impl<'a> From<&'a str> for ChunkSummary {
939 fn from(text: &'a str) -> Self {
940 Self {
941 text: TextSummary::from(text),
942 fingerprint: Rope::text_fingerprint(text),
943 }
944 }
945}
946
947impl sum_tree::Summary for ChunkSummary {
948 type Context = ();
949
950 fn add_summary(&mut self, summary: &Self, _: &()) {
951 self.text += &summary.text;
952 self.fingerprint = self.fingerprint * summary.fingerprint;
953 }
954}
955
956/// Summary of a string of text.
957#[derive(Clone, Debug, Default, Eq, PartialEq)]
958pub struct TextSummary {
959 /// Length in UTF-8
960 pub len: usize,
961 /// Length in UTF-16 code units
962 pub len_utf16: OffsetUtf16,
963 /// A point representing the number of lines and the length of the last line
964 pub lines: Point,
965 /// How many `char`s are in the first line
966 pub first_line_chars: u32,
967 /// How many `char`s are in the last line
968 pub last_line_chars: u32,
969 /// How many UTF-16 code units are in the last line
970 pub last_line_len_utf16: u32,
971 /// The row idx of the longest row
972 pub longest_row: u32,
973 /// How many `char`s are in the longest row
974 pub longest_row_chars: u32,
975}
976
977impl TextSummary {
978 pub fn lines_utf16(&self) -> PointUtf16 {
979 PointUtf16 {
980 row: self.lines.row,
981 column: self.last_line_len_utf16,
982 }
983 }
984}
985
986impl<'a> From<&'a str> for TextSummary {
987 fn from(text: &'a str) -> Self {
988 let mut len_utf16 = OffsetUtf16(0);
989 let mut lines = Point::new(0, 0);
990 let mut first_line_chars = 0;
991 let mut last_line_chars = 0;
992 let mut last_line_len_utf16 = 0;
993 let mut longest_row = 0;
994 let mut longest_row_chars = 0;
995 for c in text.chars() {
996 len_utf16.0 += c.len_utf16();
997
998 if c == '\n' {
999 lines += Point::new(1, 0);
1000 last_line_len_utf16 = 0;
1001 last_line_chars = 0;
1002 } else {
1003 lines.column += c.len_utf8() as u32;
1004 last_line_len_utf16 += c.len_utf16() as u32;
1005 last_line_chars += 1;
1006 }
1007
1008 if lines.row == 0 {
1009 first_line_chars = last_line_chars;
1010 }
1011
1012 if last_line_chars > longest_row_chars {
1013 longest_row = lines.row;
1014 longest_row_chars = last_line_chars;
1015 }
1016 }
1017
1018 TextSummary {
1019 len: text.len(),
1020 len_utf16,
1021 lines,
1022 first_line_chars,
1023 last_line_chars,
1024 last_line_len_utf16,
1025 longest_row,
1026 longest_row_chars,
1027 }
1028 }
1029}
1030
1031impl sum_tree::Summary for TextSummary {
1032 type Context = ();
1033
1034 fn add_summary(&mut self, summary: &Self, _: &Self::Context) {
1035 *self += summary;
1036 }
1037}
1038
1039impl std::ops::Add<Self> for TextSummary {
1040 type Output = Self;
1041
1042 fn add(mut self, rhs: Self) -> Self::Output {
1043 AddAssign::add_assign(&mut self, &rhs);
1044 self
1045 }
1046}
1047
1048impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
1049 fn add_assign(&mut self, other: &'a Self) {
1050 let joined_chars = self.last_line_chars + other.first_line_chars;
1051 if joined_chars > self.longest_row_chars {
1052 self.longest_row = self.lines.row;
1053 self.longest_row_chars = joined_chars;
1054 }
1055 if other.longest_row_chars > self.longest_row_chars {
1056 self.longest_row = self.lines.row + other.longest_row;
1057 self.longest_row_chars = other.longest_row_chars;
1058 }
1059
1060 if self.lines.row == 0 {
1061 self.first_line_chars += other.first_line_chars;
1062 }
1063
1064 if other.lines.row == 0 {
1065 self.last_line_chars += other.first_line_chars;
1066 self.last_line_len_utf16 += other.last_line_len_utf16;
1067 } else {
1068 self.last_line_chars = other.last_line_chars;
1069 self.last_line_len_utf16 = other.last_line_len_utf16;
1070 }
1071
1072 self.len += other.len;
1073 self.len_utf16 += other.len_utf16;
1074 self.lines += other.lines;
1075 }
1076}
1077
1078impl std::ops::AddAssign<Self> for TextSummary {
1079 fn add_assign(&mut self, other: Self) {
1080 *self += &other;
1081 }
1082}
1083
1084pub trait TextDimension: 'static + for<'a> Dimension<'a, ChunkSummary> {
1085 fn from_text_summary(summary: &TextSummary) -> Self;
1086 fn add_assign(&mut self, other: &Self);
1087}
1088
1089impl<D1: TextDimension, D2: TextDimension> TextDimension for (D1, D2) {
1090 fn from_text_summary(summary: &TextSummary) -> Self {
1091 (
1092 D1::from_text_summary(summary),
1093 D2::from_text_summary(summary),
1094 )
1095 }
1096
1097 fn add_assign(&mut self, other: &Self) {
1098 self.0.add_assign(&other.0);
1099 self.1.add_assign(&other.1);
1100 }
1101}
1102
1103impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
1104 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1105 *self += &summary.text;
1106 }
1107}
1108
1109impl TextDimension for TextSummary {
1110 fn from_text_summary(summary: &TextSummary) -> Self {
1111 summary.clone()
1112 }
1113
1114 fn add_assign(&mut self, other: &Self) {
1115 *self += other;
1116 }
1117}
1118
1119impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
1120 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1121 *self += summary.text.len;
1122 }
1123}
1124
1125impl TextDimension for usize {
1126 fn from_text_summary(summary: &TextSummary) -> Self {
1127 summary.len
1128 }
1129
1130 fn add_assign(&mut self, other: &Self) {
1131 *self += other;
1132 }
1133}
1134
1135impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
1136 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1137 *self += summary.text.len_utf16;
1138 }
1139}
1140
1141impl TextDimension for OffsetUtf16 {
1142 fn from_text_summary(summary: &TextSummary) -> Self {
1143 summary.len_utf16
1144 }
1145
1146 fn add_assign(&mut self, other: &Self) {
1147 *self += other;
1148 }
1149}
1150
1151impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
1152 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1153 *self += summary.text.lines;
1154 }
1155}
1156
1157impl TextDimension for Point {
1158 fn from_text_summary(summary: &TextSummary) -> Self {
1159 summary.lines
1160 }
1161
1162 fn add_assign(&mut self, other: &Self) {
1163 *self += other;
1164 }
1165}
1166
1167impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
1168 fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
1169 *self += summary.text.lines_utf16();
1170 }
1171}
1172
1173impl TextDimension for PointUtf16 {
1174 fn from_text_summary(summary: &TextSummary) -> Self {
1175 summary.lines_utf16()
1176 }
1177
1178 fn add_assign(&mut self, other: &Self) {
1179 *self += other;
1180 }
1181}
1182
1183#[cfg(test)]
1184mod tests {
1185 use super::*;
1186 use rand::prelude::*;
1187 use std::{cmp::Ordering, env, io::Read};
1188 use util::RandomCharIter;
1189 use Bias::{Left, Right};
1190
1191 #[test]
1192 fn test_all_4_byte_chars() {
1193 let mut rope = Rope::new();
1194 let text = "🏀".repeat(256);
1195 rope.push(&text);
1196 assert_eq!(rope.text(), text);
1197 }
1198
1199 #[test]
1200 fn test_clip() {
1201 let rope = Rope::from("🧘");
1202
1203 assert_eq!(rope.clip_offset(1, Bias::Left), 0);
1204 assert_eq!(rope.clip_offset(1, Bias::Right), 4);
1205 assert_eq!(rope.clip_offset(5, Bias::Right), 4);
1206
1207 assert_eq!(
1208 rope.clip_point(Point::new(0, 1), Bias::Left),
1209 Point::new(0, 0)
1210 );
1211 assert_eq!(
1212 rope.clip_point(Point::new(0, 1), Bias::Right),
1213 Point::new(0, 4)
1214 );
1215 assert_eq!(
1216 rope.clip_point(Point::new(0, 5), Bias::Right),
1217 Point::new(0, 4)
1218 );
1219
1220 assert_eq!(
1221 rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Left),
1222 PointUtf16::new(0, 0)
1223 );
1224 assert_eq!(
1225 rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Right),
1226 PointUtf16::new(0, 2)
1227 );
1228 assert_eq!(
1229 rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 3)), Bias::Right),
1230 PointUtf16::new(0, 2)
1231 );
1232
1233 assert_eq!(
1234 rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
1235 OffsetUtf16(0)
1236 );
1237 assert_eq!(
1238 rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
1239 OffsetUtf16(2)
1240 );
1241 assert_eq!(
1242 rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
1243 OffsetUtf16(2)
1244 );
1245 }
1246
1247 #[gpui::test(iterations = 100)]
1248 fn test_random_rope(mut rng: StdRng) {
1249 let operations = env::var("OPERATIONS")
1250 .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1251 .unwrap_or(10);
1252
1253 let mut expected = String::new();
1254 let mut actual = Rope::new();
1255 for _ in 0..operations {
1256 let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1257 let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1258 let len = rng.gen_range(0..=64);
1259 let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1260
1261 let mut new_actual = Rope::new();
1262 let mut cursor = actual.cursor(0);
1263 new_actual.append(cursor.slice(start_ix));
1264 new_actual.push(&new_text);
1265 cursor.seek_forward(end_ix);
1266 new_actual.append(cursor.suffix());
1267 actual = new_actual;
1268
1269 expected.replace_range(start_ix..end_ix, &new_text);
1270
1271 assert_eq!(actual.text(), expected);
1272 log::info!("text: {:?}", expected);
1273
1274 for _ in 0..5 {
1275 let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1276 let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1277
1278 let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1279 assert_eq!(actual_text, &expected[start_ix..end_ix]);
1280
1281 let mut actual_text = String::new();
1282 actual
1283 .bytes_in_range(start_ix..end_ix)
1284 .read_to_string(&mut actual_text)
1285 .unwrap();
1286 assert_eq!(actual_text, &expected[start_ix..end_ix]);
1287
1288 assert_eq!(
1289 actual
1290 .reversed_chunks_in_range(start_ix..end_ix)
1291 .collect::<Vec<&str>>()
1292 .into_iter()
1293 .rev()
1294 .collect::<String>(),
1295 &expected[start_ix..end_ix]
1296 );
1297 }
1298
1299 let mut offset_utf16 = OffsetUtf16(0);
1300 let mut point = Point::new(0, 0);
1301 let mut point_utf16 = PointUtf16::new(0, 0);
1302 for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
1303 assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
1304 assert_eq!(
1305 actual.offset_to_point_utf16(ix),
1306 point_utf16,
1307 "offset_to_point_utf16({})",
1308 ix
1309 );
1310 assert_eq!(
1311 actual.point_to_offset(point),
1312 ix,
1313 "point_to_offset({:?})",
1314 point
1315 );
1316 assert_eq!(
1317 actual.point_utf16_to_offset(point_utf16),
1318 ix,
1319 "point_utf16_to_offset({:?})",
1320 point_utf16
1321 );
1322 assert_eq!(
1323 actual.offset_to_offset_utf16(ix),
1324 offset_utf16,
1325 "offset_to_offset_utf16({:?})",
1326 ix
1327 );
1328 assert_eq!(
1329 actual.offset_utf16_to_offset(offset_utf16),
1330 ix,
1331 "offset_utf16_to_offset({:?})",
1332 offset_utf16
1333 );
1334 if ch == '\n' {
1335 point += Point::new(1, 0);
1336 point_utf16 += PointUtf16::new(1, 0);
1337 } else {
1338 point.column += ch.len_utf8() as u32;
1339 point_utf16.column += ch.len_utf16() as u32;
1340 }
1341 offset_utf16.0 += ch.len_utf16();
1342 }
1343
1344 let mut offset_utf16 = OffsetUtf16(0);
1345 let mut point_utf16 = Unclipped(PointUtf16::zero());
1346 for unit in expected.encode_utf16() {
1347 let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
1348 let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
1349 assert!(right_offset >= left_offset);
1350 // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
1351 actual.offset_utf16_to_offset(left_offset);
1352 actual.offset_utf16_to_offset(right_offset);
1353
1354 let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
1355 let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
1356 assert!(right_point >= left_point);
1357 // Ensure translating valid UTF-16 points to offsets doesn't panic.
1358 actual.point_utf16_to_offset(left_point);
1359 actual.point_utf16_to_offset(right_point);
1360
1361 offset_utf16.0 += 1;
1362 if unit == b'\n' as u16 {
1363 point_utf16.0 += PointUtf16::new(1, 0);
1364 } else {
1365 point_utf16.0 += PointUtf16::new(0, 1);
1366 }
1367 }
1368
1369 for _ in 0..5 {
1370 let end_ix = clip_offset(&expected, rng.gen_range(0..=expected.len()), Right);
1371 let start_ix = clip_offset(&expected, rng.gen_range(0..=end_ix), Left);
1372 assert_eq!(
1373 actual.cursor(start_ix).summary::<TextSummary>(end_ix),
1374 TextSummary::from(&expected[start_ix..end_ix])
1375 );
1376 }
1377
1378 let mut expected_longest_rows = Vec::new();
1379 let mut longest_line_len = -1_isize;
1380 for (row, line) in expected.split('\n').enumerate() {
1381 let row = row as u32;
1382 assert_eq!(
1383 actual.line_len(row),
1384 line.len() as u32,
1385 "invalid line len for row {}",
1386 row
1387 );
1388
1389 let line_char_count = line.chars().count() as isize;
1390 match line_char_count.cmp(&longest_line_len) {
1391 Ordering::Less => {}
1392 Ordering::Equal => expected_longest_rows.push(row),
1393 Ordering::Greater => {
1394 longest_line_len = line_char_count;
1395 expected_longest_rows.clear();
1396 expected_longest_rows.push(row);
1397 }
1398 }
1399 }
1400
1401 let longest_row = actual.summary().longest_row;
1402 assert!(
1403 expected_longest_rows.contains(&longest_row),
1404 "incorrect longest row {}. expected {:?} with length {}",
1405 longest_row,
1406 expected_longest_rows,
1407 longest_line_len,
1408 );
1409 }
1410 }
1411
1412 fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
1413 while !text.is_char_boundary(offset) {
1414 match bias {
1415 Bias::Left => offset -= 1,
1416 Bias::Right => offset += 1,
1417 }
1418 }
1419 offset
1420 }
1421
1422 impl Rope {
1423 fn text(&self) -> String {
1424 let mut text = String::new();
1425 for chunk in self.chunks.cursor::<()>() {
1426 text.push_str(&chunk.0);
1427 }
1428 text
1429 }
1430 }
1431}