1mod chunk;
2mod offset_utf16;
3mod point;
4mod point_utf16;
5mod unclipped;
6
7use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
8use regex::Regex;
9use smallvec::SmallVec;
10use std::{
11 borrow::Cow,
12 cmp, fmt, io, mem,
13 ops::{self, AddAssign, Range},
14 str,
15 sync::{Arc, LazyLock},
16};
17use sum_tree::{Bias, Dimension, Dimensions, SumTree};
18
19pub use chunk::{Chunk, ChunkSlice};
20pub use offset_utf16::OffsetUtf16;
21pub use point::Point;
22pub use point_utf16::PointUtf16;
23pub use unclipped::Unclipped;
24
25use crate::chunk::Bitmap;
26
27static LINE_SEPARATORS_REGEX: LazyLock<Regex> =
28 LazyLock::new(|| Regex::new(r"\r\n|\r").expect("Failed to create LINE_SEPARATORS_REGEX"));
29
30#[derive(Clone, Copy, Debug, PartialEq)]
31pub enum LineEnding {
32 Unix,
33 Windows,
34}
35
36impl Default for LineEnding {
37 fn default() -> Self {
38 #[cfg(unix)]
39 return Self::Unix;
40
41 #[cfg(not(unix))]
42 return Self::Windows;
43 }
44}
45
46impl LineEnding {
47 pub fn as_str(&self) -> &'static str {
48 match self {
49 LineEnding::Unix => "\n",
50 LineEnding::Windows => "\r\n",
51 }
52 }
53
54 pub fn label(&self) -> &'static str {
55 match self {
56 LineEnding::Unix => "LF",
57 LineEnding::Windows => "CRLF",
58 }
59 }
60
61 pub fn detect(text: &str) -> Self {
62 let mut max_ix = cmp::min(text.len(), 1000);
63 while !text.is_char_boundary(max_ix) {
64 max_ix -= 1;
65 }
66
67 if let Some(ix) = text[..max_ix].find(['\n']) {
68 if ix > 0 && text.as_bytes()[ix - 1] == b'\r' {
69 Self::Windows
70 } else {
71 Self::Unix
72 }
73 } else {
74 Self::default()
75 }
76 }
77
78 pub fn normalize(text: &mut String) {
79 if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(text, "\n") {
80 *text = replaced;
81 }
82 }
83
84 pub fn normalize_arc(text: Arc<str>) -> Arc<str> {
85 if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") {
86 replaced.into()
87 } else {
88 text
89 }
90 }
91
92 pub fn normalize_cow(text: Cow<str>) -> Cow<str> {
93 if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") {
94 replaced.into()
95 } else {
96 text
97 }
98 }
99
100 /// Converts text chunks into a [`String`] using the current line ending.
101 pub fn into_string(&self, chunks: Chunks<'_>) -> String {
102 match self {
103 LineEnding::Unix => chunks.collect(),
104 LineEnding::Windows => {
105 let line_ending = self.as_str();
106 let mut result = String::new();
107 for chunk in chunks {
108 result.push_str(&chunk.replace('\n', line_ending));
109 }
110 result
111 }
112 }
113 }
114}
115
116#[derive(Clone, Default)]
117pub struct Rope {
118 chunks: SumTree<Chunk>,
119}
120
121impl Rope {
122 pub fn new() -> Self {
123 Self::default()
124 }
125
126 /// Checks that `index`-th byte is the first byte in a UTF-8 code point
127 /// sequence or the end of the string.
128 ///
129 /// The start and end of the string (when `index == self.len()`) are
130 /// considered to be boundaries.
131 ///
132 /// Returns `false` if `index` is greater than `self.len()`.
133 pub fn is_char_boundary(&self, offset: usize) -> bool {
134 if self.chunks.is_empty() {
135 return offset == 0;
136 }
137 let (start, _, item) = self.chunks.find::<usize, _>((), &offset, Bias::Left);
138 let chunk_offset = offset - start;
139 item.map(|chunk| chunk.is_char_boundary(chunk_offset))
140 .unwrap_or(false)
141 }
142
143 #[track_caller]
144 #[inline(always)]
145 pub fn assert_char_boundary(&self, offset: usize) {
146 if self.chunks.is_empty() && offset == 0 {
147 return;
148 }
149 let (start, _, item) = self.chunks.find::<usize, _>((), &offset, Bias::Left);
150 match item {
151 Some(chunk) => {
152 let chunk_offset = offset - start;
153 chunk.assert_char_boundary(chunk_offset);
154 }
155 None => {
156 panic!(
157 "byte index {} is out of bounds of rope (length: {})",
158 offset,
159 self.len()
160 );
161 }
162 }
163 }
164
165 pub fn floor_char_boundary(&self, index: usize) -> usize {
166 if index >= self.len() {
167 self.len()
168 } else {
169 #[inline]
170 pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
171 // This is bit magic equivalent to: b < 128 || b >= 192
172 (u8 as i8) >= -0x40
173 }
174
175 let (start, _, item) = self.chunks.find::<usize, _>((), &index, Bias::Left);
176 let chunk_offset = index - start;
177 let lower_idx = item.map(|chunk| {
178 let lower_bound = chunk_offset.saturating_sub(3);
179 chunk
180 .text
181 .as_bytes()
182 .get(lower_bound..=chunk_offset)
183 .map(|it| {
184 let new_idx = it
185 .iter()
186 .rposition(|&b| is_utf8_char_boundary(b))
187 .unwrap_or(0);
188 lower_bound + new_idx
189 })
190 .unwrap_or(chunk.text.len())
191 });
192 lower_idx.map_or_else(|| self.len(), |idx| start + idx)
193 }
194 }
195
196 pub fn ceil_char_boundary(&self, index: usize) -> usize {
197 if index > self.len() {
198 self.len()
199 } else {
200 #[inline]
201 pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
202 // This is bit magic equivalent to: b < 128 || b >= 192
203 (u8 as i8) >= -0x40
204 }
205
206 let (start, _, item) = self.chunks.find::<usize, _>((), &index, Bias::Left);
207 let chunk_offset = index - start;
208 let upper_idx = item.map(|chunk| {
209 let upper_bound = Ord::min(chunk_offset + 4, chunk.text.len());
210 chunk.text.as_bytes()[chunk_offset..upper_bound]
211 .iter()
212 .position(|&b| is_utf8_char_boundary(b))
213 .map_or(upper_bound, |pos| pos + chunk_offset)
214 });
215
216 upper_idx.map_or_else(|| self.len(), |idx| start + idx)
217 }
218 }
219
220 pub fn append(&mut self, rope: Rope) {
221 if let Some(chunk) = rope.chunks.first()
222 && (self
223 .chunks
224 .last()
225 .is_some_and(|c| c.text.len() < chunk::MIN_BASE)
226 || chunk.text.len() < chunk::MIN_BASE)
227 {
228 self.push_chunk(chunk.as_slice());
229
230 let mut chunks = rope.chunks.cursor::<()>(());
231 chunks.next();
232 chunks.next();
233 self.chunks.append(chunks.suffix(), ());
234 } else {
235 self.chunks.append(rope.chunks, ());
236 }
237 self.check_invariants();
238 }
239
240 pub fn replace(&mut self, range: Range<usize>, text: &str) {
241 let mut new_rope = Rope::new();
242 let mut cursor = self.cursor(0);
243 new_rope.append(cursor.slice(range.start));
244 cursor.seek_forward(range.end);
245 new_rope.push(text);
246 new_rope.append(cursor.suffix());
247 *self = new_rope;
248 }
249
250 pub fn slice(&self, range: Range<usize>) -> Rope {
251 let mut cursor = self.cursor(0);
252 cursor.seek_forward(range.start);
253 cursor.slice(range.end)
254 }
255
256 pub fn slice_rows(&self, range: Range<u32>) -> Rope {
257 // This would be more efficient with a forward advance after the first, but it's fine.
258 let start = self.point_to_offset(Point::new(range.start, 0));
259 let end = self.point_to_offset(Point::new(range.end, 0));
260 self.slice(start..end)
261 }
262
263 pub fn push(&mut self, mut text: &str) {
264 self.chunks.update_last(
265 |last_chunk| {
266 let split_ix = if last_chunk.text.len() + text.len() <= chunk::MAX_BASE {
267 text.len()
268 } else {
269 let mut split_ix = cmp::min(
270 chunk::MIN_BASE.saturating_sub(last_chunk.text.len()),
271 text.len(),
272 );
273 while !text.is_char_boundary(split_ix) {
274 split_ix += 1;
275 }
276 split_ix
277 };
278
279 let (suffix, remainder) = text.split_at(split_ix);
280 last_chunk.push_str(suffix);
281 text = remainder;
282 },
283 (),
284 );
285
286 if text.len() > 2048 {
287 return self.push_large(text);
288 }
289 let mut new_chunks = SmallVec::<[_; 16]>::new();
290
291 while !text.is_empty() {
292 let mut split_ix = cmp::min(chunk::MAX_BASE, text.len());
293 while !text.is_char_boundary(split_ix) {
294 split_ix -= 1;
295 }
296 let (chunk, remainder) = text.split_at(split_ix);
297 new_chunks.push(chunk);
298 text = remainder;
299 }
300
301 #[cfg(test)]
302 const PARALLEL_THRESHOLD: usize = 4;
303 #[cfg(not(test))]
304 const PARALLEL_THRESHOLD: usize = 4 * (2 * sum_tree::TREE_BASE);
305
306 if new_chunks.len() >= PARALLEL_THRESHOLD {
307 self.chunks
308 .par_extend(new_chunks.into_vec().into_par_iter().map(Chunk::new), ());
309 } else {
310 self.chunks
311 .extend(new_chunks.into_iter().map(Chunk::new), ());
312 }
313
314 self.check_invariants();
315 }
316
317 /// A copy of `push` specialized for working with large quantities of text.
318 fn push_large(&mut self, mut text: &str) {
319 // To avoid frequent reallocs when loading large swaths of file contents,
320 // we estimate worst-case `new_chunks` capacity;
321 // Chunk is a fixed-capacity buffer. If a character falls on
322 // chunk boundary, we push it off to the following chunk (thus leaving a small bit of capacity unfilled in current chunk).
323 // Worst-case chunk count when loading a file is then a case where every chunk ends up with that unused capacity.
324 // Since we're working with UTF-8, each character is at most 4 bytes wide. It follows then that the worst case is where
325 // a chunk ends with 3 bytes of a 4-byte character. These 3 bytes end up being stored in the following chunk, thus wasting
326 // 3 bytes of storage in current chunk.
327 // For example, a 1024-byte string can occupy between 32 (full ASCII, 1024/32) and 36 (full 4-byte UTF-8, 1024 / 29 rounded up) chunks.
328 const MIN_CHUNK_SIZE: usize = chunk::MAX_BASE - 3;
329
330 // We also round up the capacity up by one, for a good measure; we *really* don't want to realloc here, as we assume that the # of characters
331 // we're working with there is large.
332 let capacity = text.len().div_ceil(MIN_CHUNK_SIZE);
333 let mut new_chunks = Vec::with_capacity(capacity);
334
335 while !text.is_empty() {
336 let mut split_ix = cmp::min(chunk::MAX_BASE, text.len());
337 while !text.is_char_boundary(split_ix) {
338 split_ix -= 1;
339 }
340 let (chunk, remainder) = text.split_at(split_ix);
341 new_chunks.push(chunk);
342 text = remainder;
343 }
344
345 #[cfg(test)]
346 const PARALLEL_THRESHOLD: usize = 4;
347 #[cfg(not(test))]
348 const PARALLEL_THRESHOLD: usize = 4 * (2 * sum_tree::TREE_BASE);
349
350 if new_chunks.len() >= PARALLEL_THRESHOLD {
351 self.chunks
352 .par_extend(new_chunks.into_par_iter().map(Chunk::new), ());
353 } else {
354 self.chunks
355 .extend(new_chunks.into_iter().map(Chunk::new), ());
356 }
357
358 self.check_invariants();
359 }
360
361 fn push_chunk(&mut self, mut chunk: ChunkSlice) {
362 self.chunks.update_last(
363 |last_chunk| {
364 let split_ix = if last_chunk.text.len() + chunk.len() <= chunk::MAX_BASE {
365 chunk.len()
366 } else {
367 let mut split_ix = cmp::min(
368 chunk::MIN_BASE.saturating_sub(last_chunk.text.len()),
369 chunk.len(),
370 );
371 while !chunk.is_char_boundary(split_ix) {
372 split_ix += 1;
373 }
374 split_ix
375 };
376
377 let (suffix, remainder) = chunk.split_at(split_ix);
378 last_chunk.append(suffix);
379 chunk = remainder;
380 },
381 (),
382 );
383
384 if !chunk.is_empty() {
385 self.chunks.push(chunk.into(), ());
386 }
387 }
388
389 pub fn push_front(&mut self, text: &str) {
390 let suffix = mem::replace(self, Rope::from(text));
391 self.append(suffix);
392 }
393
394 fn check_invariants(&self) {
395 #[cfg(test)]
396 {
397 // Ensure all chunks except maybe the last one are not underflowing.
398 // Allow some wiggle room for multibyte characters at chunk boundaries.
399 let mut chunks = self.chunks.cursor::<()>(()).peekable();
400 while let Some(chunk) = chunks.next() {
401 if chunks.peek().is_some() {
402 assert!(chunk.text.len() + 3 >= chunk::MIN_BASE);
403 }
404 }
405 }
406 }
407
408 pub fn summary(&self) -> TextSummary {
409 self.chunks.summary().text
410 }
411
412 pub fn len(&self) -> usize {
413 self.chunks.extent(())
414 }
415
416 pub fn is_empty(&self) -> bool {
417 self.len() == 0
418 }
419
420 pub fn max_point(&self) -> Point {
421 self.chunks.extent(())
422 }
423
424 pub fn max_point_utf16(&self) -> PointUtf16 {
425 self.chunks.extent(())
426 }
427
428 pub fn cursor(&self, offset: usize) -> Cursor<'_> {
429 Cursor::new(self, offset)
430 }
431
432 pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
433 self.chars_at(0)
434 }
435
436 pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
437 self.chunks_in_range(start..self.len()).flat_map(str::chars)
438 }
439
440 pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
441 self.reversed_chunks_in_range(0..start)
442 .flat_map(|chunk| chunk.chars().rev())
443 }
444
445 pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes<'_> {
446 Bytes::new(self, range, false)
447 }
448
449 pub fn reversed_bytes_in_range(&self, range: Range<usize>) -> Bytes<'_> {
450 Bytes::new(self, range, true)
451 }
452
453 pub fn chunks(&self) -> Chunks<'_> {
454 self.chunks_in_range(0..self.len())
455 }
456
457 pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks<'_> {
458 Chunks::new(self, range, false)
459 }
460
461 pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks<'_> {
462 Chunks::new(self, range, true)
463 }
464
465 /// Formats the rope's text with the specified line ending string.
466 /// This replaces all `\n` characters with the provided line ending.
467 ///
468 /// The rope internally stores all line breaks as `\n` (see `Display` impl).
469 /// Use this method to convert to different line endings for file operations,
470 /// LSP communication, or other scenarios requiring specific line ending formats.
471 pub fn to_string_with_line_ending(&self, line_ending: LineEnding) -> String {
472 line_ending.into_string(self.chunks())
473 }
474
475 pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
476 if offset >= self.summary().len {
477 return self.summary().len_utf16;
478 }
479 let (start, _, item) =
480 self.chunks
481 .find::<Dimensions<usize, OffsetUtf16>, _>((), &offset, Bias::Left);
482 let overshoot = offset - start.0;
483 start.1
484 + item.map_or(Default::default(), |chunk| {
485 chunk.as_slice().offset_to_offset_utf16(overshoot)
486 })
487 }
488
489 pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
490 if offset >= self.summary().len_utf16 {
491 return self.summary().len;
492 }
493 let (start, _, item) =
494 self.chunks
495 .find::<Dimensions<OffsetUtf16, usize>, _>((), &offset, Bias::Left);
496 let overshoot = offset - start.0;
497 start.1
498 + item.map_or(Default::default(), |chunk| {
499 chunk.as_slice().offset_utf16_to_offset(overshoot)
500 })
501 }
502
503 pub fn offset_to_point(&self, offset: usize) -> Point {
504 if offset >= self.summary().len {
505 return self.summary().lines;
506 }
507 let (start, _, item) =
508 self.chunks
509 .find::<Dimensions<usize, Point>, _>((), &offset, Bias::Left);
510 let overshoot = offset - start.0;
511 start.1
512 + item.map_or(Point::zero(), |chunk| {
513 chunk.as_slice().offset_to_point(overshoot)
514 })
515 }
516
517 pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
518 if offset >= self.summary().len {
519 return self.summary().lines_utf16();
520 }
521 let (start, _, item) =
522 self.chunks
523 .find::<Dimensions<usize, PointUtf16>, _>((), &offset, Bias::Left);
524 let overshoot = offset - start.0;
525 start.1
526 + item.map_or(PointUtf16::zero(), |chunk| {
527 chunk.as_slice().offset_to_point_utf16(overshoot)
528 })
529 }
530
531 pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
532 if point >= self.summary().lines {
533 return self.summary().lines_utf16();
534 }
535 let (start, _, item) =
536 self.chunks
537 .find::<Dimensions<Point, PointUtf16>, _>((), &point, Bias::Left);
538 let overshoot = point - start.0;
539 start.1
540 + item.map_or(PointUtf16::zero(), |chunk| {
541 chunk.as_slice().point_to_point_utf16(overshoot)
542 })
543 }
544
545 pub fn point_utf16_to_point(&self, point: PointUtf16) -> Point {
546 if point >= self.summary().lines_utf16() {
547 return self.summary().lines;
548 }
549 let mut cursor = self.chunks.cursor::<Dimensions<PointUtf16, Point>>(());
550 cursor.seek(&point, Bias::Left);
551 let overshoot = point - cursor.start().0;
552 cursor.start().1
553 + cursor.item().map_or(Point::zero(), |chunk| {
554 chunk
555 .as_slice()
556 .offset_to_point(chunk.as_slice().point_utf16_to_offset(overshoot, false))
557 })
558 }
559
560 pub fn point_to_offset(&self, point: Point) -> usize {
561 if point >= self.summary().lines {
562 return self.summary().len;
563 }
564 let (start, _, item) =
565 self.chunks
566 .find::<Dimensions<Point, usize>, _>((), &point, Bias::Left);
567 let overshoot = point - start.0;
568 start.1 + item.map_or(0, |chunk| chunk.as_slice().point_to_offset(overshoot))
569 }
570
571 pub fn point_to_offset_utf16(&self, point: Point) -> OffsetUtf16 {
572 if point >= self.summary().lines {
573 return self.summary().len_utf16;
574 }
575 let mut cursor = self.chunks.cursor::<Dimensions<Point, OffsetUtf16>>(());
576 cursor.seek(&point, Bias::Left);
577 let overshoot = point - cursor.start().0;
578 cursor.start().1
579 + cursor.item().map_or(OffsetUtf16(0), |chunk| {
580 chunk.as_slice().point_to_offset_utf16(overshoot)
581 })
582 }
583
584 pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
585 self.point_utf16_to_offset_impl(point, false)
586 }
587
588 pub fn point_utf16_to_offset_utf16(&self, point: PointUtf16) -> OffsetUtf16 {
589 self.point_utf16_to_offset_utf16_impl(point, false)
590 }
591
592 pub fn unclipped_point_utf16_to_offset(&self, point: Unclipped<PointUtf16>) -> usize {
593 self.point_utf16_to_offset_impl(point.0, true)
594 }
595
596 fn point_utf16_to_offset_impl(&self, point: PointUtf16, clip: bool) -> usize {
597 if point >= self.summary().lines_utf16() {
598 return self.summary().len;
599 }
600 let (start, _, item) =
601 self.chunks
602 .find::<Dimensions<PointUtf16, usize>, _>((), &point, Bias::Left);
603 let overshoot = point - start.0;
604 start.1
605 + item.map_or(0, |chunk| {
606 chunk.as_slice().point_utf16_to_offset(overshoot, clip)
607 })
608 }
609
610 fn point_utf16_to_offset_utf16_impl(&self, point: PointUtf16, clip: bool) -> OffsetUtf16 {
611 if point >= self.summary().lines_utf16() {
612 return self.summary().len_utf16;
613 }
614 let mut cursor = self
615 .chunks
616 .cursor::<Dimensions<PointUtf16, OffsetUtf16>>(());
617 cursor.seek(&point, Bias::Left);
618 let overshoot = point - cursor.start().0;
619 cursor.start().1
620 + cursor.item().map_or(OffsetUtf16(0), |chunk| {
621 chunk
622 .as_slice()
623 .offset_to_offset_utf16(chunk.as_slice().point_utf16_to_offset(overshoot, clip))
624 })
625 }
626
627 pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
628 if point.0 >= self.summary().lines_utf16() {
629 return self.summary().lines;
630 }
631 let (start, _, item) =
632 self.chunks
633 .find::<Dimensions<PointUtf16, Point>, _>((), &point.0, Bias::Left);
634 let overshoot = Unclipped(point.0 - start.0);
635 start.1
636 + item.map_or(Point::zero(), |chunk| {
637 chunk.as_slice().unclipped_point_utf16_to_point(overshoot)
638 })
639 }
640
641 pub fn clip_offset(&self, offset: usize, bias: Bias) -> usize {
642 match bias {
643 Bias::Left => self.floor_char_boundary(offset),
644 Bias::Right => self.ceil_char_boundary(offset),
645 }
646 }
647
648 pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
649 let (start, _, item) = self.chunks.find::<OffsetUtf16, _>((), &offset, Bias::Right);
650 if let Some(chunk) = item {
651 let overshoot = offset - start;
652 start + chunk.as_slice().clip_offset_utf16(overshoot, bias)
653 } else {
654 self.summary().len_utf16
655 }
656 }
657
658 pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
659 let (start, _, item) = self.chunks.find::<Point, _>((), &point, Bias::Right);
660 if let Some(chunk) = item {
661 let overshoot = point - start;
662 start + chunk.as_slice().clip_point(overshoot, bias)
663 } else {
664 self.summary().lines
665 }
666 }
667
668 pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
669 let (start, _, item) = self.chunks.find::<PointUtf16, _>((), &point.0, Bias::Right);
670 if let Some(chunk) = item {
671 let overshoot = Unclipped(point.0 - start);
672 start + chunk.as_slice().clip_point_utf16(overshoot, bias)
673 } else {
674 self.summary().lines_utf16()
675 }
676 }
677
678 pub fn line_len(&self, row: u32) -> u32 {
679 self.clip_point(Point::new(row, u32::MAX), Bias::Left)
680 .column
681 }
682}
683
684impl<'a> From<&'a str> for Rope {
685 fn from(text: &'a str) -> Self {
686 let mut rope = Self::new();
687 rope.push(text);
688 rope
689 }
690}
691
692impl<'a> FromIterator<&'a str> for Rope {
693 fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
694 let mut rope = Rope::new();
695 for chunk in iter {
696 rope.push(chunk);
697 }
698 rope
699 }
700}
701
702impl From<String> for Rope {
703 #[inline(always)]
704 fn from(text: String) -> Self {
705 Rope::from(text.as_str())
706 }
707}
708
709impl From<&String> for Rope {
710 #[inline(always)]
711 fn from(text: &String) -> Self {
712 Rope::from(text.as_str())
713 }
714}
715
716/// Display implementation for Rope.
717///
718/// Note: This always uses `\n` as the line separator, regardless of the original
719/// file's line endings. The rope internally normalizes all line breaks to `\n`.
720/// If you need to preserve original line endings (e.g., for LSP communication),
721/// use `to_string_with_line_ending` instead.
722impl fmt::Display for Rope {
723 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
724 for chunk in self.chunks() {
725 write!(f, "{chunk}")?;
726 }
727 Ok(())
728 }
729}
730
731impl fmt::Debug for Rope {
732 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
733 use std::fmt::Write as _;
734
735 write!(f, "\"")?;
736 let mut format_string = String::new();
737 for chunk in self.chunks() {
738 write!(&mut format_string, "{:?}", chunk)?;
739 write!(f, "{}", &format_string[1..format_string.len() - 1])?;
740 format_string.clear();
741 }
742 write!(f, "\"")?;
743 Ok(())
744 }
745}
746
747pub struct Cursor<'a> {
748 rope: &'a Rope,
749 chunks: sum_tree::Cursor<'a, 'static, Chunk, usize>,
750 offset: usize,
751}
752
753impl<'a> Cursor<'a> {
754 pub fn new(rope: &'a Rope, offset: usize) -> Self {
755 let mut chunks = rope.chunks.cursor(());
756 chunks.seek(&offset, Bias::Right);
757 Self {
758 rope,
759 chunks,
760 offset,
761 }
762 }
763
764 pub fn seek_forward(&mut self, end_offset: usize) {
765 debug_assert!(end_offset >= self.offset);
766
767 self.chunks.seek_forward(&end_offset, Bias::Right);
768 self.offset = end_offset;
769 }
770
771 pub fn slice(&mut self, end_offset: usize) -> Rope {
772 debug_assert!(
773 end_offset >= self.offset,
774 "cannot slice backwards from {} to {}",
775 self.offset,
776 end_offset
777 );
778
779 let mut slice = Rope::new();
780 if let Some(start_chunk) = self.chunks.item() {
781 let start_ix = self.offset - self.chunks.start();
782 let end_ix = cmp::min(end_offset, self.chunks.end()) - self.chunks.start();
783 slice.push_chunk(start_chunk.slice(start_ix..end_ix));
784 }
785
786 if end_offset > self.chunks.end() {
787 self.chunks.next();
788 slice.append(Rope {
789 chunks: self.chunks.slice(&end_offset, Bias::Right),
790 });
791 if let Some(end_chunk) = self.chunks.item() {
792 let end_ix = end_offset - self.chunks.start();
793 slice.push_chunk(end_chunk.slice(0..end_ix));
794 }
795 }
796
797 self.offset = end_offset;
798 slice
799 }
800
801 pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
802 debug_assert!(end_offset >= self.offset);
803
804 let mut summary = D::zero(());
805 if let Some(start_chunk) = self.chunks.item() {
806 let start_ix = self.offset - self.chunks.start();
807 let end_ix = cmp::min(end_offset, self.chunks.end()) - self.chunks.start();
808 summary.add_assign(&D::from_chunk(start_chunk.slice(start_ix..end_ix)));
809 }
810
811 if end_offset > self.chunks.end() {
812 self.chunks.next();
813 summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right));
814 if let Some(end_chunk) = self.chunks.item() {
815 let end_ix = end_offset - self.chunks.start();
816 summary.add_assign(&D::from_chunk(end_chunk.slice(0..end_ix)));
817 }
818 }
819
820 self.offset = end_offset;
821 summary
822 }
823
824 pub fn suffix(mut self) -> Rope {
825 self.slice(self.rope.chunks.extent(()))
826 }
827
828 pub fn offset(&self) -> usize {
829 self.offset
830 }
831}
832
833pub struct ChunkBitmaps<'a> {
834 /// A slice of text up to 128 bytes in size
835 pub text: &'a str,
836 /// Bitmap of character locations in text. LSB ordered
837 pub chars: Bitmap,
838 /// Bitmap of tab locations in text. LSB ordered
839 pub tabs: Bitmap,
840}
841
842#[derive(Clone)]
843pub struct Chunks<'a> {
844 chunks: sum_tree::Cursor<'a, 'static, Chunk, usize>,
845 range: Range<usize>,
846 offset: usize,
847 reversed: bool,
848}
849
850impl<'a> Chunks<'a> {
851 pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
852 let mut chunks = rope.chunks.cursor(());
853 let offset = if reversed {
854 chunks.seek(&range.end, Bias::Left);
855 range.end
856 } else {
857 chunks.seek(&range.start, Bias::Right);
858 range.start
859 };
860 let chunk_offset = offset - chunks.start();
861 if let Some(chunk) = chunks.item()
862 && !chunk.text.is_char_boundary(chunk_offset)
863 {
864 panic!("byte index {} is not a char boundary", offset);
865 }
866 Self {
867 chunks,
868 range,
869 offset,
870 reversed,
871 }
872 }
873
874 fn offset_is_valid(&self) -> bool {
875 if self.reversed {
876 if self.offset <= self.range.start || self.offset > self.range.end {
877 return false;
878 }
879 } else if self.offset < self.range.start || self.offset >= self.range.end {
880 return false;
881 }
882
883 true
884 }
885
886 pub fn offset(&self) -> usize {
887 self.offset
888 }
889
890 pub fn seek(&mut self, mut offset: usize) {
891 offset = offset.clamp(self.range.start, self.range.end);
892
893 if self.reversed {
894 if offset > self.chunks.end() {
895 self.chunks.seek_forward(&offset, Bias::Left);
896 } else if offset <= *self.chunks.start() {
897 self.chunks.seek(&offset, Bias::Left);
898 }
899 } else {
900 if offset >= self.chunks.end() {
901 self.chunks.seek_forward(&offset, Bias::Right);
902 } else if offset < *self.chunks.start() {
903 self.chunks.seek(&offset, Bias::Right);
904 }
905 };
906
907 self.offset = offset;
908 }
909
910 pub fn set_range(&mut self, range: Range<usize>) {
911 self.range = range.clone();
912 self.seek(range.start);
913 }
914
915 /// Moves this cursor to the start of the next line in the rope.
916 ///
917 /// This method advances the cursor to the beginning of the next line.
918 /// If the cursor is already at the end of the rope, this method does nothing.
919 /// Reversed chunks iterators are not currently supported and will panic.
920 ///
921 /// Returns `true` if the cursor was successfully moved to the next line start,
922 /// or `false` if the cursor was already at the end of the rope.
923 pub fn next_line(&mut self) -> bool {
924 assert!(!self.reversed);
925
926 let mut found = false;
927 if let Some(chunk) = self.peek() {
928 if let Some(newline_ix) = chunk.find('\n') {
929 self.offset += newline_ix + 1;
930 found = self.offset <= self.range.end;
931 } else {
932 self.chunks
933 .search_forward(|summary| summary.text.lines.row > 0);
934 self.offset = *self.chunks.start();
935
936 if let Some(newline_ix) = self.peek().and_then(|chunk| chunk.find('\n')) {
937 self.offset += newline_ix + 1;
938 found = self.offset <= self.range.end;
939 } else {
940 self.offset = self.chunks.end();
941 }
942 }
943
944 if self.offset == self.chunks.end() {
945 self.next();
946 }
947 }
948
949 if self.offset > self.range.end {
950 self.offset = cmp::min(self.offset, self.range.end);
951 self.chunks.seek(&self.offset, Bias::Right);
952 }
953
954 found
955 }
956
957 /// Move this cursor to the preceding position in the rope that starts a new line.
958 /// Reversed chunks iterators are not currently supported and will panic.
959 ///
960 /// If this cursor is not on the start of a line, it will be moved to the start of
961 /// its current line. Otherwise it will be moved to the start of the previous line.
962 /// It updates the cursor's position and returns true if a previous line was found,
963 /// or false if the cursor was already at the start of the rope.
964 pub fn prev_line(&mut self) -> bool {
965 assert!(!self.reversed);
966
967 let initial_offset = self.offset;
968
969 if self.offset == *self.chunks.start() {
970 self.chunks.prev();
971 }
972
973 if let Some(chunk) = self.chunks.item() {
974 let mut end_ix = self.offset - *self.chunks.start();
975 if chunk.text.as_bytes()[end_ix - 1] == b'\n' {
976 end_ix -= 1;
977 }
978
979 if let Some(newline_ix) = chunk.text[..end_ix].rfind('\n') {
980 self.offset = *self.chunks.start() + newline_ix + 1;
981 if self.offset_is_valid() {
982 return true;
983 }
984 }
985 }
986
987 self.chunks
988 .search_backward(|summary| summary.text.lines.row > 0);
989 self.offset = *self.chunks.start();
990 if let Some(chunk) = self.chunks.item()
991 && let Some(newline_ix) = chunk.text.rfind('\n')
992 {
993 self.offset += newline_ix + 1;
994 if self.offset_is_valid() {
995 if self.offset == self.chunks.end() {
996 self.chunks.next();
997 }
998
999 return true;
1000 }
1001 }
1002
1003 if !self.offset_is_valid() || self.chunks.item().is_none() {
1004 self.offset = self.range.start;
1005 self.chunks.seek(&self.offset, Bias::Right);
1006 }
1007
1008 self.offset < initial_offset && self.offset == 0
1009 }
1010
1011 pub fn peek(&self) -> Option<&'a str> {
1012 if !self.offset_is_valid() {
1013 return None;
1014 }
1015
1016 let chunk = self.chunks.item()?;
1017 let chunk_start = *self.chunks.start();
1018 let slice_range = if self.reversed {
1019 let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
1020 let slice_end = self.offset - chunk_start;
1021 slice_start..slice_end
1022 } else {
1023 let slice_start = self.offset - chunk_start;
1024 let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start;
1025 slice_start..slice_end
1026 };
1027
1028 Some(&chunk.text[slice_range])
1029 }
1030
1031 /// Returns bitmaps that represent character positions and tab positions
1032 pub fn peek_with_bitmaps(&self) -> Option<ChunkBitmaps<'a>> {
1033 if !self.offset_is_valid() {
1034 return None;
1035 }
1036
1037 let chunk = self.chunks.item()?;
1038 let chunk_start = *self.chunks.start();
1039 let slice_range = if self.reversed {
1040 let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
1041 let slice_end = self.offset - chunk_start;
1042 slice_start..slice_end
1043 } else {
1044 let slice_start = self.offset - chunk_start;
1045 let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start;
1046 slice_start..slice_end
1047 };
1048 let chunk_start_offset = slice_range.start;
1049 let slice_text = &chunk.text[slice_range];
1050
1051 // Shift the tabs to align with our slice window
1052 let shifted_tabs = chunk.tabs() >> chunk_start_offset;
1053 let shifted_chars = chunk.chars() >> chunk_start_offset;
1054
1055 Some(ChunkBitmaps {
1056 text: slice_text,
1057 chars: shifted_chars,
1058 tabs: shifted_tabs,
1059 })
1060 }
1061
1062 pub fn lines(self) -> Lines<'a> {
1063 let reversed = self.reversed;
1064 Lines {
1065 chunks: self,
1066 current_line: String::new(),
1067 done: false,
1068 reversed,
1069 }
1070 }
1071
1072 pub fn equals_str(&self, other: &str) -> bool {
1073 let chunk = self.clone();
1074 if chunk.reversed {
1075 let mut offset = other.len();
1076 for chunk in chunk {
1077 if other[0..offset].ends_with(chunk) {
1078 offset -= chunk.len();
1079 } else {
1080 return false;
1081 }
1082 }
1083 if offset != 0 {
1084 return false;
1085 }
1086 } else {
1087 let mut offset = 0;
1088 for chunk in chunk {
1089 if offset >= other.len() {
1090 return false;
1091 }
1092 if other[offset..].starts_with(chunk) {
1093 offset += chunk.len();
1094 } else {
1095 return false;
1096 }
1097 }
1098 if offset != other.len() {
1099 return false;
1100 }
1101 }
1102
1103 true
1104 }
1105}
1106
1107pub struct ChunkWithBitmaps<'a>(pub Chunks<'a>);
1108
1109impl<'a> Iterator for ChunkWithBitmaps<'a> {
1110 /// text, chars bitmap, tabs bitmap
1111 type Item = ChunkBitmaps<'a>;
1112
1113 fn next(&mut self) -> Option<Self::Item> {
1114 let chunk_bitmaps = self.0.peek_with_bitmaps()?;
1115 if self.0.reversed {
1116 self.0.offset -= chunk_bitmaps.text.len();
1117 if self.0.offset <= *self.0.chunks.start() {
1118 self.0.chunks.prev();
1119 }
1120 } else {
1121 self.0.offset += chunk_bitmaps.text.len();
1122 if self.0.offset >= self.0.chunks.end() {
1123 self.0.chunks.next();
1124 }
1125 }
1126
1127 Some(chunk_bitmaps)
1128 }
1129}
1130
1131impl<'a> Iterator for Chunks<'a> {
1132 type Item = &'a str;
1133
1134 fn next(&mut self) -> Option<Self::Item> {
1135 let chunk = self.peek()?;
1136 if self.reversed {
1137 self.offset -= chunk.len();
1138 if self.offset <= *self.chunks.start() {
1139 self.chunks.prev();
1140 }
1141 } else {
1142 self.offset += chunk.len();
1143 if self.offset >= self.chunks.end() {
1144 self.chunks.next();
1145 }
1146 }
1147
1148 Some(chunk)
1149 }
1150}
1151
1152pub struct Bytes<'a> {
1153 chunks: sum_tree::Cursor<'a, 'static, Chunk, usize>,
1154 range: Range<usize>,
1155 reversed: bool,
1156}
1157
1158impl<'a> Bytes<'a> {
1159 pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
1160 let mut chunks = rope.chunks.cursor(());
1161 if reversed {
1162 chunks.seek(&range.end, Bias::Left);
1163 } else {
1164 chunks.seek(&range.start, Bias::Right);
1165 }
1166 Self {
1167 chunks,
1168 range,
1169 reversed,
1170 }
1171 }
1172
1173 pub fn peek(&self) -> Option<&'a [u8]> {
1174 let chunk = self.chunks.item()?;
1175 if self.reversed && self.range.start >= self.chunks.end() {
1176 return None;
1177 }
1178 let chunk_start = *self.chunks.start();
1179 if self.range.end <= chunk_start {
1180 return None;
1181 }
1182 let start = self.range.start.saturating_sub(chunk_start);
1183 let end = self.range.end - chunk_start;
1184 Some(&chunk.text.as_bytes()[start..chunk.text.len().min(end)])
1185 }
1186}
1187
1188impl<'a> Iterator for Bytes<'a> {
1189 type Item = &'a [u8];
1190
1191 fn next(&mut self) -> Option<Self::Item> {
1192 let result = self.peek();
1193 if result.is_some() {
1194 if self.reversed {
1195 self.chunks.prev();
1196 } else {
1197 self.chunks.next();
1198 }
1199 }
1200 result
1201 }
1202}
1203
1204impl io::Read for Bytes<'_> {
1205 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1206 if let Some(chunk) = self.peek() {
1207 let len = cmp::min(buf.len(), chunk.len());
1208 if self.reversed {
1209 buf[..len].copy_from_slice(&chunk[chunk.len() - len..]);
1210 buf[..len].reverse();
1211 self.range.end -= len;
1212 } else {
1213 buf[..len].copy_from_slice(&chunk[..len]);
1214 self.range.start += len;
1215 }
1216
1217 if len == chunk.len() {
1218 if self.reversed {
1219 self.chunks.prev();
1220 } else {
1221 self.chunks.next();
1222 }
1223 }
1224 Ok(len)
1225 } else {
1226 Ok(0)
1227 }
1228 }
1229}
1230
1231pub struct Lines<'a> {
1232 chunks: Chunks<'a>,
1233 current_line: String,
1234 done: bool,
1235 reversed: bool,
1236}
1237
1238impl<'a> Lines<'a> {
1239 pub fn next(&mut self) -> Option<&str> {
1240 if self.done {
1241 return None;
1242 }
1243
1244 self.current_line.clear();
1245
1246 while let Some(chunk) = self.chunks.peek() {
1247 let chunk_lines = chunk.split('\n');
1248 if self.reversed {
1249 let mut chunk_lines = chunk_lines.rev().peekable();
1250 if let Some(chunk_line) = chunk_lines.next() {
1251 let done = chunk_lines.peek().is_some();
1252 if done {
1253 self.chunks
1254 .seek(self.chunks.offset() - chunk_line.len() - "\n".len());
1255 if self.current_line.is_empty() {
1256 return Some(chunk_line);
1257 }
1258 }
1259 self.current_line.insert_str(0, chunk_line);
1260 if done {
1261 return Some(&self.current_line);
1262 }
1263 }
1264 } else {
1265 let mut chunk_lines = chunk_lines.peekable();
1266 if let Some(chunk_line) = chunk_lines.next() {
1267 let done = chunk_lines.peek().is_some();
1268 if done {
1269 self.chunks
1270 .seek(self.chunks.offset() + chunk_line.len() + "\n".len());
1271 if self.current_line.is_empty() {
1272 return Some(chunk_line);
1273 }
1274 }
1275 self.current_line.push_str(chunk_line);
1276 if done {
1277 return Some(&self.current_line);
1278 }
1279 }
1280 }
1281
1282 self.chunks.next();
1283 }
1284
1285 self.done = true;
1286 Some(&self.current_line)
1287 }
1288
1289 pub fn seek(&mut self, offset: usize) {
1290 self.chunks.seek(offset);
1291 self.current_line.clear();
1292 self.done = false;
1293 }
1294
1295 pub fn offset(&self) -> usize {
1296 self.chunks.offset()
1297 }
1298}
1299
1300impl sum_tree::Item for Chunk {
1301 type Summary = ChunkSummary;
1302
1303 fn summary(&self, _cx: ()) -> Self::Summary {
1304 ChunkSummary {
1305 text: self.as_slice().text_summary(),
1306 }
1307 }
1308}
1309
1310#[derive(Clone, Debug, Default, Eq, PartialEq)]
1311pub struct ChunkSummary {
1312 text: TextSummary,
1313}
1314
1315impl sum_tree::ContextLessSummary for ChunkSummary {
1316 fn zero() -> Self {
1317 Default::default()
1318 }
1319
1320 fn add_summary(&mut self, summary: &Self) {
1321 self.text += &summary.text;
1322 }
1323}
1324
1325/// Summary of a string of text.
1326#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
1327pub struct TextSummary {
1328 /// Length in bytes.
1329 pub len: usize,
1330 /// Length in UTF-8.
1331 pub chars: usize,
1332 /// Length in UTF-16 code units
1333 pub len_utf16: OffsetUtf16,
1334 /// A point representing the number of lines and the length of the last line.
1335 ///
1336 /// In other words, it marks the point after the last byte in the text, (if
1337 /// EOF was a character, this would be its position).
1338 pub lines: Point,
1339 /// How many `char`s are in the first line
1340 pub first_line_chars: u32,
1341 /// How many `char`s are in the last line
1342 pub last_line_chars: u32,
1343 /// How many UTF-16 code units are in the last line
1344 pub last_line_len_utf16: u32,
1345 /// The row idx of the longest row
1346 pub longest_row: u32,
1347 /// How many `char`s are in the longest row
1348 pub longest_row_chars: u32,
1349}
1350
1351impl TextSummary {
1352 pub fn lines_utf16(&self) -> PointUtf16 {
1353 PointUtf16 {
1354 row: self.lines.row,
1355 column: self.last_line_len_utf16,
1356 }
1357 }
1358
1359 pub fn newline() -> Self {
1360 Self {
1361 len: 1,
1362 chars: 1,
1363 len_utf16: OffsetUtf16(1),
1364 first_line_chars: 0,
1365 last_line_chars: 0,
1366 last_line_len_utf16: 0,
1367 lines: Point::new(1, 0),
1368 longest_row: 0,
1369 longest_row_chars: 0,
1370 }
1371 }
1372
1373 pub fn add_newline(&mut self) {
1374 self.len += 1;
1375 self.len_utf16 += OffsetUtf16(self.len_utf16.0 + 1);
1376 self.last_line_chars = 0;
1377 self.last_line_len_utf16 = 0;
1378 self.lines += Point::new(1, 0);
1379 }
1380}
1381
1382impl<'a> From<&'a str> for TextSummary {
1383 fn from(text: &'a str) -> Self {
1384 let mut len_utf16 = OffsetUtf16(0);
1385 let mut lines = Point::new(0, 0);
1386 let mut first_line_chars = 0;
1387 let mut last_line_chars = 0;
1388 let mut last_line_len_utf16 = 0;
1389 let mut longest_row = 0;
1390 let mut longest_row_chars = 0;
1391 let mut chars = 0;
1392 for c in text.chars() {
1393 chars += 1;
1394 len_utf16.0 += c.len_utf16();
1395
1396 if c == '\n' {
1397 lines += Point::new(1, 0);
1398 last_line_len_utf16 = 0;
1399 last_line_chars = 0;
1400 } else {
1401 lines.column += c.len_utf8() as u32;
1402 last_line_len_utf16 += c.len_utf16() as u32;
1403 last_line_chars += 1;
1404 }
1405
1406 if lines.row == 0 {
1407 first_line_chars = last_line_chars;
1408 }
1409
1410 if last_line_chars > longest_row_chars {
1411 longest_row = lines.row;
1412 longest_row_chars = last_line_chars;
1413 }
1414 }
1415
1416 TextSummary {
1417 len: text.len(),
1418 chars,
1419 len_utf16,
1420 lines,
1421 first_line_chars,
1422 last_line_chars,
1423 last_line_len_utf16,
1424 longest_row,
1425 longest_row_chars,
1426 }
1427 }
1428}
1429
1430impl sum_tree::ContextLessSummary for TextSummary {
1431 fn zero() -> Self {
1432 Default::default()
1433 }
1434
1435 fn add_summary(&mut self, summary: &Self) {
1436 *self += summary;
1437 }
1438}
1439
1440impl ops::Add<Self> for TextSummary {
1441 type Output = Self;
1442
1443 fn add(mut self, rhs: Self) -> Self::Output {
1444 AddAssign::add_assign(&mut self, &rhs);
1445 self
1446 }
1447}
1448
1449impl<'a> ops::AddAssign<&'a Self> for TextSummary {
1450 fn add_assign(&mut self, other: &'a Self) {
1451 let joined_chars = self.last_line_chars + other.first_line_chars;
1452 if joined_chars > self.longest_row_chars {
1453 self.longest_row = self.lines.row;
1454 self.longest_row_chars = joined_chars;
1455 }
1456 if other.longest_row_chars > self.longest_row_chars {
1457 self.longest_row = self.lines.row + other.longest_row;
1458 self.longest_row_chars = other.longest_row_chars;
1459 }
1460
1461 if self.lines.row == 0 {
1462 self.first_line_chars += other.first_line_chars;
1463 }
1464
1465 if other.lines.row == 0 {
1466 self.last_line_chars += other.first_line_chars;
1467 self.last_line_len_utf16 += other.last_line_len_utf16;
1468 } else {
1469 self.last_line_chars = other.last_line_chars;
1470 self.last_line_len_utf16 = other.last_line_len_utf16;
1471 }
1472
1473 self.chars += other.chars;
1474 self.len += other.len;
1475 self.len_utf16 += other.len_utf16;
1476 self.lines += other.lines;
1477 }
1478}
1479
1480impl ops::AddAssign<Self> for TextSummary {
1481 fn add_assign(&mut self, other: Self) {
1482 *self += &other;
1483 }
1484}
1485
1486pub trait TextDimension:
1487 'static + Clone + Copy + Default + for<'a> Dimension<'a, ChunkSummary> + std::fmt::Debug
1488{
1489 fn from_text_summary(summary: &TextSummary) -> Self;
1490 fn from_chunk(chunk: ChunkSlice) -> Self;
1491 fn add_assign(&mut self, other: &Self);
1492}
1493
1494impl<D1: TextDimension, D2: TextDimension> TextDimension for Dimensions<D1, D2, ()> {
1495 fn from_text_summary(summary: &TextSummary) -> Self {
1496 Dimensions(
1497 D1::from_text_summary(summary),
1498 D2::from_text_summary(summary),
1499 (),
1500 )
1501 }
1502
1503 fn from_chunk(chunk: ChunkSlice) -> Self {
1504 Dimensions(D1::from_chunk(chunk), D2::from_chunk(chunk), ())
1505 }
1506
1507 fn add_assign(&mut self, other: &Self) {
1508 self.0.add_assign(&other.0);
1509 self.1.add_assign(&other.1);
1510 }
1511}
1512
1513impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
1514 fn zero(_cx: ()) -> Self {
1515 Default::default()
1516 }
1517
1518 fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1519 *self += &summary.text;
1520 }
1521}
1522
1523impl TextDimension for TextSummary {
1524 fn from_text_summary(summary: &TextSummary) -> Self {
1525 *summary
1526 }
1527
1528 fn from_chunk(chunk: ChunkSlice) -> Self {
1529 chunk.text_summary()
1530 }
1531
1532 fn add_assign(&mut self, other: &Self) {
1533 *self += other;
1534 }
1535}
1536
1537impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
1538 fn zero(_cx: ()) -> Self {
1539 Default::default()
1540 }
1541
1542 fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1543 *self += summary.text.len;
1544 }
1545}
1546
1547impl TextDimension for usize {
1548 fn from_text_summary(summary: &TextSummary) -> Self {
1549 summary.len
1550 }
1551
1552 fn from_chunk(chunk: ChunkSlice) -> Self {
1553 chunk.len()
1554 }
1555
1556 fn add_assign(&mut self, other: &Self) {
1557 *self += other;
1558 }
1559}
1560
1561impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
1562 fn zero(_cx: ()) -> Self {
1563 Default::default()
1564 }
1565
1566 fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1567 *self += summary.text.len_utf16;
1568 }
1569}
1570
1571impl TextDimension for OffsetUtf16 {
1572 fn from_text_summary(summary: &TextSummary) -> Self {
1573 summary.len_utf16
1574 }
1575
1576 fn from_chunk(chunk: ChunkSlice) -> Self {
1577 chunk.len_utf16()
1578 }
1579
1580 fn add_assign(&mut self, other: &Self) {
1581 *self += other;
1582 }
1583}
1584
1585impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
1586 fn zero(_cx: ()) -> Self {
1587 Default::default()
1588 }
1589
1590 fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1591 *self += summary.text.lines;
1592 }
1593}
1594
1595impl TextDimension for Point {
1596 fn from_text_summary(summary: &TextSummary) -> Self {
1597 summary.lines
1598 }
1599
1600 fn from_chunk(chunk: ChunkSlice) -> Self {
1601 chunk.lines()
1602 }
1603
1604 fn add_assign(&mut self, other: &Self) {
1605 *self += other;
1606 }
1607}
1608
1609impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
1610 fn zero(_cx: ()) -> Self {
1611 Default::default()
1612 }
1613
1614 fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1615 *self += summary.text.lines_utf16();
1616 }
1617}
1618
1619impl TextDimension for PointUtf16 {
1620 fn from_text_summary(summary: &TextSummary) -> Self {
1621 summary.lines_utf16()
1622 }
1623
1624 fn from_chunk(chunk: ChunkSlice) -> Self {
1625 PointUtf16 {
1626 row: chunk.lines().row,
1627 column: chunk.last_line_len_utf16(),
1628 }
1629 }
1630
1631 fn add_assign(&mut self, other: &Self) {
1632 *self += other;
1633 }
1634}
1635
1636/// A pair of text dimensions in which only the first dimension is used for comparison,
1637/// but both dimensions are updated during addition and subtraction.
1638#[derive(Clone, Copy, Debug)]
1639pub struct DimensionPair<K, V> {
1640 pub key: K,
1641 pub value: Option<V>,
1642}
1643
1644impl<K: Default, V: Default> Default for DimensionPair<K, V> {
1645 fn default() -> Self {
1646 Self {
1647 key: Default::default(),
1648 value: Some(Default::default()),
1649 }
1650 }
1651}
1652
1653impl<K, V> cmp::Ord for DimensionPair<K, V>
1654where
1655 K: cmp::Ord,
1656{
1657 fn cmp(&self, other: &Self) -> cmp::Ordering {
1658 self.key.cmp(&other.key)
1659 }
1660}
1661
1662impl<K, V> cmp::PartialOrd for DimensionPair<K, V>
1663where
1664 K: cmp::PartialOrd,
1665{
1666 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
1667 self.key.partial_cmp(&other.key)
1668 }
1669}
1670
1671impl<K, V> cmp::PartialEq for DimensionPair<K, V>
1672where
1673 K: cmp::PartialEq,
1674{
1675 fn eq(&self, other: &Self) -> bool {
1676 self.key.eq(&other.key)
1677 }
1678}
1679
1680impl<K, V> ops::Sub for DimensionPair<K, V>
1681where
1682 K: ops::Sub<K, Output = K>,
1683 V: ops::Sub<V, Output = V>,
1684{
1685 type Output = Self;
1686
1687 fn sub(self, rhs: Self) -> Self::Output {
1688 Self {
1689 key: self.key - rhs.key,
1690 value: self.value.zip(rhs.value).map(|(a, b)| a - b),
1691 }
1692 }
1693}
1694
1695impl<K, V> cmp::Eq for DimensionPair<K, V> where K: cmp::Eq {}
1696
1697impl<'a, K, V> sum_tree::Dimension<'a, ChunkSummary> for DimensionPair<K, V>
1698where
1699 K: sum_tree::Dimension<'a, ChunkSummary>,
1700 V: sum_tree::Dimension<'a, ChunkSummary>,
1701{
1702 fn zero(_cx: ()) -> Self {
1703 Self {
1704 key: K::zero(_cx),
1705 value: Some(V::zero(_cx)),
1706 }
1707 }
1708
1709 fn add_summary(&mut self, summary: &'a ChunkSummary, _cx: ()) {
1710 self.key.add_summary(summary, _cx);
1711 if let Some(value) = &mut self.value {
1712 value.add_summary(summary, _cx);
1713 }
1714 }
1715}
1716
1717impl<K, V> TextDimension for DimensionPair<K, V>
1718where
1719 K: TextDimension,
1720 V: TextDimension,
1721{
1722 fn add_assign(&mut self, other: &Self) {
1723 self.key.add_assign(&other.key);
1724 if let Some(value) = &mut self.value {
1725 if let Some(other_value) = other.value.as_ref() {
1726 value.add_assign(other_value);
1727 } else {
1728 self.value.take();
1729 }
1730 }
1731 }
1732
1733 fn from_chunk(chunk: ChunkSlice) -> Self {
1734 Self {
1735 key: K::from_chunk(chunk),
1736 value: Some(V::from_chunk(chunk)),
1737 }
1738 }
1739
1740 fn from_text_summary(summary: &TextSummary) -> Self {
1741 Self {
1742 key: K::from_text_summary(summary),
1743 value: Some(V::from_text_summary(summary)),
1744 }
1745 }
1746}
1747
1748#[cfg(test)]
1749mod tests {
1750 use super::*;
1751 use Bias::{Left, Right};
1752 use rand::prelude::*;
1753 use std::{cmp::Ordering, env, io::Read};
1754 use util::RandomCharIter;
1755
1756 #[ctor::ctor]
1757 fn init_logger() {
1758 zlog::init_test();
1759 }
1760
1761 #[test]
1762 fn test_all_4_byte_chars() {
1763 let mut rope = Rope::new();
1764 let text = "🏀".repeat(256);
1765 rope.push(&text);
1766 assert_eq!(rope.text(), text);
1767 }
1768
1769 #[test]
1770 fn test_clip() {
1771 let rope = Rope::from("🧘");
1772
1773 assert_eq!(rope.clip_offset(1, Bias::Left), 0);
1774 assert_eq!(rope.clip_offset(1, Bias::Right), 4);
1775 assert_eq!(rope.clip_offset(5, Bias::Right), 4);
1776
1777 assert_eq!(
1778 rope.clip_point(Point::new(0, 1), Bias::Left),
1779 Point::new(0, 0)
1780 );
1781 assert_eq!(
1782 rope.clip_point(Point::new(0, 1), Bias::Right),
1783 Point::new(0, 4)
1784 );
1785 assert_eq!(
1786 rope.clip_point(Point::new(0, 5), Bias::Right),
1787 Point::new(0, 4)
1788 );
1789
1790 assert_eq!(
1791 rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Left),
1792 PointUtf16::new(0, 0)
1793 );
1794 assert_eq!(
1795 rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Right),
1796 PointUtf16::new(0, 2)
1797 );
1798 assert_eq!(
1799 rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 3)), Bias::Right),
1800 PointUtf16::new(0, 2)
1801 );
1802
1803 assert_eq!(
1804 rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
1805 OffsetUtf16(0)
1806 );
1807 assert_eq!(
1808 rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
1809 OffsetUtf16(2)
1810 );
1811 assert_eq!(
1812 rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
1813 OffsetUtf16(2)
1814 );
1815 }
1816
1817 #[test]
1818 fn test_prev_next_line() {
1819 let rope = Rope::from("abc\ndef\nghi\njkl");
1820
1821 let mut chunks = rope.chunks();
1822 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
1823
1824 assert!(chunks.next_line());
1825 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd');
1826
1827 assert!(chunks.next_line());
1828 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g');
1829
1830 assert!(chunks.next_line());
1831 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j');
1832
1833 assert!(!chunks.next_line());
1834 assert_eq!(chunks.peek(), None);
1835
1836 assert!(chunks.prev_line());
1837 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j');
1838
1839 assert!(chunks.prev_line());
1840 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g');
1841
1842 assert!(chunks.prev_line());
1843 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd');
1844
1845 assert!(chunks.prev_line());
1846 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
1847
1848 assert!(!chunks.prev_line());
1849 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
1850
1851 // Only return true when the cursor has moved to the start of a line
1852 let mut chunks = rope.chunks_in_range(5..7);
1853 chunks.seek(6);
1854 assert!(!chunks.prev_line());
1855 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'e');
1856
1857 assert!(!chunks.next_line());
1858 assert_eq!(chunks.peek(), None);
1859 }
1860
1861 #[test]
1862 fn test_lines() {
1863 let rope = Rope::from("abc\ndefg\nhi");
1864 let mut lines = rope.chunks().lines();
1865 assert_eq!(lines.next(), Some("abc"));
1866 assert_eq!(lines.next(), Some("defg"));
1867 assert_eq!(lines.next(), Some("hi"));
1868 assert_eq!(lines.next(), None);
1869
1870 let rope = Rope::from("abc\ndefg\nhi\n");
1871 let mut lines = rope.chunks().lines();
1872 assert_eq!(lines.next(), Some("abc"));
1873 assert_eq!(lines.next(), Some("defg"));
1874 assert_eq!(lines.next(), Some("hi"));
1875 assert_eq!(lines.next(), Some(""));
1876 assert_eq!(lines.next(), None);
1877
1878 let rope = Rope::from("abc\ndefg\nhi");
1879 let mut lines = rope.reversed_chunks_in_range(0..rope.len()).lines();
1880 assert_eq!(lines.next(), Some("hi"));
1881 assert_eq!(lines.next(), Some("defg"));
1882 assert_eq!(lines.next(), Some("abc"));
1883 assert_eq!(lines.next(), None);
1884
1885 let rope = Rope::from("abc\ndefg\nhi\n");
1886 let mut lines = rope.reversed_chunks_in_range(0..rope.len()).lines();
1887 assert_eq!(lines.next(), Some(""));
1888 assert_eq!(lines.next(), Some("hi"));
1889 assert_eq!(lines.next(), Some("defg"));
1890 assert_eq!(lines.next(), Some("abc"));
1891 assert_eq!(lines.next(), None);
1892
1893 let rope = Rope::from("abc\nlonger line test\nhi");
1894 let mut lines = rope.chunks().lines();
1895 assert_eq!(lines.next(), Some("abc"));
1896 assert_eq!(lines.next(), Some("longer line test"));
1897 assert_eq!(lines.next(), Some("hi"));
1898 assert_eq!(lines.next(), None);
1899
1900 let rope = Rope::from("abc\nlonger line test\nhi");
1901 let mut lines = rope.reversed_chunks_in_range(0..rope.len()).lines();
1902 assert_eq!(lines.next(), Some("hi"));
1903 assert_eq!(lines.next(), Some("longer line test"));
1904 assert_eq!(lines.next(), Some("abc"));
1905 assert_eq!(lines.next(), None);
1906 }
1907
1908 #[gpui::test(iterations = 100)]
1909 fn test_random_rope(mut rng: StdRng) {
1910 let operations = env::var("OPERATIONS")
1911 .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1912 .unwrap_or(10);
1913
1914 let mut expected = String::new();
1915 let mut actual = Rope::new();
1916 for _ in 0..operations {
1917 let end_ix = clip_offset(&expected, rng.random_range(0..=expected.len()), Right);
1918 let start_ix = clip_offset(&expected, rng.random_range(0..=end_ix), Left);
1919 let len = rng.random_range(0..=64);
1920 let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1921
1922 let mut new_actual = Rope::new();
1923 let mut cursor = actual.cursor(0);
1924 new_actual.append(cursor.slice(start_ix));
1925 new_actual.push(&new_text);
1926 cursor.seek_forward(end_ix);
1927 new_actual.append(cursor.suffix());
1928 actual = new_actual;
1929
1930 expected.replace_range(start_ix..end_ix, &new_text);
1931
1932 assert_eq!(actual.text(), expected);
1933 log::info!("text: {:?}", expected);
1934
1935 for _ in 0..5 {
1936 let end_ix = clip_offset(&expected, rng.random_range(0..=expected.len()), Right);
1937 let start_ix = clip_offset(&expected, rng.random_range(0..=end_ix), Left);
1938
1939 let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1940 assert_eq!(actual_text, &expected[start_ix..end_ix]);
1941
1942 let mut actual_text = String::new();
1943 actual
1944 .bytes_in_range(start_ix..end_ix)
1945 .read_to_string(&mut actual_text)
1946 .unwrap();
1947 assert_eq!(actual_text, &expected[start_ix..end_ix]);
1948
1949 assert_eq!(
1950 actual
1951 .reversed_chunks_in_range(start_ix..end_ix)
1952 .collect::<Vec<&str>>()
1953 .into_iter()
1954 .rev()
1955 .collect::<String>(),
1956 &expected[start_ix..end_ix]
1957 );
1958
1959 let mut expected_line_starts: Vec<_> = expected[start_ix..end_ix]
1960 .match_indices('\n')
1961 .map(|(index, _)| start_ix + index + 1)
1962 .collect();
1963
1964 let mut chunks = actual.chunks_in_range(start_ix..end_ix);
1965
1966 let mut actual_line_starts = Vec::new();
1967 while chunks.next_line() {
1968 actual_line_starts.push(chunks.offset());
1969 }
1970 assert_eq!(
1971 actual_line_starts,
1972 expected_line_starts,
1973 "actual line starts != expected line starts when using next_line() for {:?} ({:?})",
1974 &expected[start_ix..end_ix],
1975 start_ix..end_ix
1976 );
1977
1978 if start_ix < end_ix
1979 && (start_ix == 0 || expected.as_bytes()[start_ix - 1] == b'\n')
1980 {
1981 expected_line_starts.insert(0, start_ix);
1982 }
1983 // Remove the last index if it starts at the end of the range.
1984 if expected_line_starts.last() == Some(&end_ix) {
1985 expected_line_starts.pop();
1986 }
1987
1988 let mut actual_line_starts = Vec::new();
1989 while chunks.prev_line() {
1990 actual_line_starts.push(chunks.offset());
1991 }
1992 actual_line_starts.reverse();
1993 assert_eq!(
1994 actual_line_starts,
1995 expected_line_starts,
1996 "actual line starts != expected line starts when using prev_line() for {:?} ({:?})",
1997 &expected[start_ix..end_ix],
1998 start_ix..end_ix
1999 );
2000
2001 // Check that next_line/prev_line work correctly from random positions
2002 let mut offset = rng.random_range(start_ix..=end_ix);
2003 while !expected.is_char_boundary(offset) {
2004 offset -= 1;
2005 }
2006 chunks.seek(offset);
2007
2008 for _ in 0..5 {
2009 if rng.random() {
2010 let expected_next_line_start = expected[offset..end_ix]
2011 .find('\n')
2012 .map(|newline_ix| offset + newline_ix + 1);
2013
2014 let moved = chunks.next_line();
2015 assert_eq!(
2016 moved,
2017 expected_next_line_start.is_some(),
2018 "unexpected result from next_line after seeking to {} in range {:?} ({:?})",
2019 offset,
2020 start_ix..end_ix,
2021 &expected[start_ix..end_ix]
2022 );
2023 if let Some(expected_next_line_start) = expected_next_line_start {
2024 assert_eq!(
2025 chunks.offset(),
2026 expected_next_line_start,
2027 "invalid position after seeking to {} in range {:?} ({:?})",
2028 offset,
2029 start_ix..end_ix,
2030 &expected[start_ix..end_ix]
2031 );
2032 } else {
2033 assert_eq!(
2034 chunks.offset(),
2035 end_ix,
2036 "invalid position after seeking to {} in range {:?} ({:?})",
2037 offset,
2038 start_ix..end_ix,
2039 &expected[start_ix..end_ix]
2040 );
2041 }
2042 } else {
2043 let search_end = if offset > 0 && expected.as_bytes()[offset - 1] == b'\n' {
2044 offset - 1
2045 } else {
2046 offset
2047 };
2048
2049 let expected_prev_line_start = expected[..search_end]
2050 .rfind('\n')
2051 .and_then(|newline_ix| {
2052 let line_start_ix = newline_ix + 1;
2053 if line_start_ix >= start_ix {
2054 Some(line_start_ix)
2055 } else {
2056 None
2057 }
2058 })
2059 .or({
2060 if offset > 0 && start_ix == 0 {
2061 Some(0)
2062 } else {
2063 None
2064 }
2065 });
2066
2067 let moved = chunks.prev_line();
2068 assert_eq!(
2069 moved,
2070 expected_prev_line_start.is_some(),
2071 "unexpected result from prev_line after seeking to {} in range {:?} ({:?})",
2072 offset,
2073 start_ix..end_ix,
2074 &expected[start_ix..end_ix]
2075 );
2076 if let Some(expected_prev_line_start) = expected_prev_line_start {
2077 assert_eq!(
2078 chunks.offset(),
2079 expected_prev_line_start,
2080 "invalid position after seeking to {} in range {:?} ({:?})",
2081 offset,
2082 start_ix..end_ix,
2083 &expected[start_ix..end_ix]
2084 );
2085 } else {
2086 assert_eq!(
2087 chunks.offset(),
2088 start_ix,
2089 "invalid position after seeking to {} in range {:?} ({:?})",
2090 offset,
2091 start_ix..end_ix,
2092 &expected[start_ix..end_ix]
2093 );
2094 }
2095 }
2096
2097 assert!((start_ix..=end_ix).contains(&chunks.offset()));
2098 if rng.random() {
2099 offset = rng.random_range(start_ix..=end_ix);
2100 while !expected.is_char_boundary(offset) {
2101 offset -= 1;
2102 }
2103 chunks.seek(offset);
2104 } else {
2105 chunks.next();
2106 offset = chunks.offset();
2107 assert!((start_ix..=end_ix).contains(&chunks.offset()));
2108 }
2109 }
2110 }
2111
2112 let mut offset_utf16 = OffsetUtf16(0);
2113 let mut point = Point::new(0, 0);
2114 let mut point_utf16 = PointUtf16::new(0, 0);
2115 for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
2116 assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
2117 assert_eq!(
2118 actual.offset_to_point_utf16(ix),
2119 point_utf16,
2120 "offset_to_point_utf16({})",
2121 ix
2122 );
2123 assert_eq!(
2124 actual.point_to_offset(point),
2125 ix,
2126 "point_to_offset({:?})",
2127 point
2128 );
2129 assert_eq!(
2130 actual.point_utf16_to_offset(point_utf16),
2131 ix,
2132 "point_utf16_to_offset({:?})",
2133 point_utf16
2134 );
2135 assert_eq!(
2136 actual.offset_to_offset_utf16(ix),
2137 offset_utf16,
2138 "offset_to_offset_utf16({:?})",
2139 ix
2140 );
2141 assert_eq!(
2142 actual.offset_utf16_to_offset(offset_utf16),
2143 ix,
2144 "offset_utf16_to_offset({:?})",
2145 offset_utf16
2146 );
2147 if ch == '\n' {
2148 point += Point::new(1, 0);
2149 point_utf16 += PointUtf16::new(1, 0);
2150 } else {
2151 point.column += ch.len_utf8() as u32;
2152 point_utf16.column += ch.len_utf16() as u32;
2153 }
2154 offset_utf16.0 += ch.len_utf16();
2155 }
2156
2157 let mut offset_utf16 = OffsetUtf16(0);
2158 let mut point_utf16 = Unclipped(PointUtf16::zero());
2159 for unit in expected.encode_utf16() {
2160 let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
2161 let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
2162 assert!(right_offset >= left_offset);
2163 // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
2164 actual.offset_utf16_to_offset(left_offset);
2165 actual.offset_utf16_to_offset(right_offset);
2166
2167 let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
2168 let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
2169 assert!(right_point >= left_point);
2170 // Ensure translating valid UTF-16 points to offsets doesn't panic.
2171 actual.point_utf16_to_offset(left_point);
2172 actual.point_utf16_to_offset(right_point);
2173
2174 offset_utf16.0 += 1;
2175 if unit == b'\n' as u16 {
2176 point_utf16.0 += PointUtf16::new(1, 0);
2177 } else {
2178 point_utf16.0 += PointUtf16::new(0, 1);
2179 }
2180 }
2181
2182 for _ in 0..5 {
2183 let end_ix = clip_offset(&expected, rng.random_range(0..=expected.len()), Right);
2184 let start_ix = clip_offset(&expected, rng.random_range(0..=end_ix), Left);
2185 assert_eq!(
2186 actual.cursor(start_ix).summary::<TextSummary>(end_ix),
2187 TextSummary::from(&expected[start_ix..end_ix])
2188 );
2189 }
2190
2191 let mut expected_longest_rows = Vec::new();
2192 let mut longest_line_len = -1_isize;
2193 for (row, line) in expected.split('\n').enumerate() {
2194 let row = row as u32;
2195 assert_eq!(
2196 actual.line_len(row),
2197 line.len() as u32,
2198 "invalid line len for row {}",
2199 row
2200 );
2201
2202 let line_char_count = line.chars().count() as isize;
2203 match line_char_count.cmp(&longest_line_len) {
2204 Ordering::Less => {}
2205 Ordering::Equal => expected_longest_rows.push(row),
2206 Ordering::Greater => {
2207 longest_line_len = line_char_count;
2208 expected_longest_rows.clear();
2209 expected_longest_rows.push(row);
2210 }
2211 }
2212 }
2213
2214 let longest_row = actual.summary().longest_row;
2215 assert!(
2216 expected_longest_rows.contains(&longest_row),
2217 "incorrect longest row {}. expected {:?} with length {}",
2218 longest_row,
2219 expected_longest_rows,
2220 longest_line_len,
2221 );
2222 }
2223 }
2224
2225 #[test]
2226 fn test_chunks_equals_str() {
2227 let text = "This is a multi-chunk\n& multi-line test string!";
2228 let rope = Rope::from(text);
2229 for start in 0..text.len() {
2230 for end in start..text.len() {
2231 let range = start..end;
2232 let correct_substring = &text[start..end];
2233
2234 // Test that correct range returns true
2235 assert!(
2236 rope.chunks_in_range(range.clone())
2237 .equals_str(correct_substring)
2238 );
2239 assert!(
2240 rope.reversed_chunks_in_range(range.clone())
2241 .equals_str(correct_substring)
2242 );
2243
2244 // Test that all other ranges return false (unless they happen to match)
2245 for other_start in 0..text.len() {
2246 for other_end in other_start..text.len() {
2247 if other_start == start && other_end == end {
2248 continue;
2249 }
2250 let other_substring = &text[other_start..other_end];
2251
2252 // Only assert false if the substrings are actually different
2253 if other_substring == correct_substring {
2254 continue;
2255 }
2256 assert!(
2257 !rope
2258 .chunks_in_range(range.clone())
2259 .equals_str(other_substring)
2260 );
2261 assert!(
2262 !rope
2263 .reversed_chunks_in_range(range.clone())
2264 .equals_str(other_substring)
2265 );
2266 }
2267 }
2268 }
2269 }
2270
2271 let rope = Rope::from("");
2272 assert!(rope.chunks_in_range(0..0).equals_str(""));
2273 assert!(rope.reversed_chunks_in_range(0..0).equals_str(""));
2274 assert!(!rope.chunks_in_range(0..0).equals_str("foo"));
2275 assert!(!rope.reversed_chunks_in_range(0..0).equals_str("foo"));
2276 }
2277
2278 #[test]
2279 fn test_is_char_boundary() {
2280 let fixture = "地";
2281 let rope = Rope::from("地");
2282 for b in 0..=fixture.len() {
2283 assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
2284 }
2285 let fixture = "";
2286 let rope = Rope::from("");
2287 for b in 0..=fixture.len() {
2288 assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
2289 }
2290 let fixture = "🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️⚧️🏁🏳️🌈🏴☠️⛳️📬📭🏴🏳️🚩";
2291 let rope = Rope::from("🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️⚧️🏁🏳️🌈🏴☠️⛳️📬📭🏴🏳️🚩");
2292 for b in 0..=fixture.len() {
2293 assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
2294 }
2295 }
2296
2297 #[test]
2298 fn test_floor_char_boundary() {
2299 // polyfill of str::floor_char_boundary
2300 fn floor_char_boundary(str: &str, index: usize) -> usize {
2301 if index >= str.len() {
2302 str.len()
2303 } else {
2304 let lower_bound = index.saturating_sub(3);
2305 let new_index = str.as_bytes()[lower_bound..=index]
2306 .iter()
2307 .rposition(|b| (*b as i8) >= -0x40);
2308
2309 lower_bound + new_index.unwrap()
2310 }
2311 }
2312
2313 let fixture = "地";
2314 let rope = Rope::from("地");
2315 for b in 0..=fixture.len() {
2316 assert_eq!(
2317 rope.floor_char_boundary(b),
2318 floor_char_boundary(&fixture, b)
2319 );
2320 }
2321
2322 let fixture = "";
2323 let rope = Rope::from("");
2324 for b in 0..=fixture.len() {
2325 assert_eq!(
2326 rope.floor_char_boundary(b),
2327 floor_char_boundary(&fixture, b)
2328 );
2329 }
2330
2331 let fixture = "🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️⚧️🏁🏳️🌈🏴☠️⛳️📬📭🏴🏳️🚩";
2332 let rope = Rope::from("🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️⚧️🏁🏳️🌈🏴☠️⛳️📬📭🏴🏳️🚩");
2333 for b in 0..=fixture.len() {
2334 assert_eq!(
2335 rope.floor_char_boundary(b),
2336 floor_char_boundary(&fixture, b)
2337 );
2338 }
2339 }
2340
2341 #[test]
2342 fn test_ceil_char_boundary() {
2343 // polyfill of str::ceil_char_boundary
2344 fn ceil_char_boundary(str: &str, index: usize) -> usize {
2345 if index > str.len() {
2346 str.len()
2347 } else {
2348 let upper_bound = Ord::min(index + 4, str.len());
2349 str.as_bytes()[index..upper_bound]
2350 .iter()
2351 .position(|b| (*b as i8) >= -0x40)
2352 .map_or(upper_bound, |pos| pos + index)
2353 }
2354 }
2355
2356 let fixture = "地";
2357 let rope = Rope::from("地");
2358 for b in 0..=fixture.len() {
2359 assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
2360 }
2361
2362 let fixture = "";
2363 let rope = Rope::from("");
2364 for b in 0..=fixture.len() {
2365 assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
2366 }
2367
2368 let fixture = "🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️⚧️🏁🏳️🌈🏴☠️⛳️📬📭🏴🏳️🚩";
2369 let rope = Rope::from("🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️⚧️🏁🏳️🌈🏴☠️⛳️📬📭🏴🏳️🚩");
2370 for b in 0..=fixture.len() {
2371 assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
2372 }
2373 }
2374
2375 #[test]
2376 fn test_to_string_with_line_ending() {
2377 // Test Unix line endings (no conversion)
2378 let rope = Rope::from("line1\nline2\nline3");
2379 assert_eq!(
2380 rope.to_string_with_line_ending(LineEnding::Unix),
2381 "line1\nline2\nline3"
2382 );
2383
2384 // Test Windows line endings
2385 assert_eq!(
2386 rope.to_string_with_line_ending(LineEnding::Windows),
2387 "line1\r\nline2\r\nline3"
2388 );
2389
2390 // Test empty rope
2391 let empty_rope = Rope::from("");
2392 assert_eq!(
2393 empty_rope.to_string_with_line_ending(LineEnding::Windows),
2394 ""
2395 );
2396
2397 // Test single line (no newlines)
2398 let single_line = Rope::from("single line");
2399 assert_eq!(
2400 single_line.to_string_with_line_ending(LineEnding::Windows),
2401 "single line"
2402 );
2403
2404 // Test rope ending with newline
2405 let ending_newline = Rope::from("line1\nline2\n");
2406 assert_eq!(
2407 ending_newline.to_string_with_line_ending(LineEnding::Windows),
2408 "line1\r\nline2\r\n"
2409 );
2410
2411 // Test large rope with multiple chunks
2412 let mut large_rope = Rope::new();
2413 for i in 0..100 {
2414 large_rope.push(&format!("line{}\n", i));
2415 }
2416 let result = large_rope.to_string_with_line_ending(LineEnding::Windows);
2417 assert!(result.contains("\r\n"));
2418 assert!(!result.contains("\n\n"));
2419 assert_eq!(result.matches("\r\n").count(), 100);
2420 }
2421
2422 fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
2423 while !text.is_char_boundary(offset) {
2424 match bias {
2425 Bias::Left => offset -= 1,
2426 Bias::Right => offset += 1,
2427 }
2428 }
2429 offset
2430 }
2431
2432 impl Rope {
2433 fn text(&self) -> String {
2434 let mut text = String::new();
2435 for chunk in self.chunks.cursor::<()>(()) {
2436 text.push_str(&chunk.text);
2437 }
2438 text
2439 }
2440 }
2441}