1mod chunk;
2mod offset_utf16;
3mod point;
4mod point_utf16;
5mod unclipped;
6
7use arrayvec::ArrayVec;
8use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
9use regex::Regex;
10use std::{
11 borrow::Cow,
12 cmp, fmt, io, mem,
13 ops::{self, AddAssign, Range},
14 str,
15 sync::{Arc, LazyLock},
16};
17use sum_tree::{Bias, Dimension, Dimensions, SumTree};
18
19pub use chunk::{Chunk, ChunkSlice};
20pub use offset_utf16::OffsetUtf16;
21pub use point::Point;
22pub use point_utf16::PointUtf16;
23pub use unclipped::Unclipped;
24
25use crate::chunk::Bitmap;
26
27static LINE_SEPARATORS_REGEX: LazyLock<Regex> =
28 LazyLock::new(|| Regex::new(r"\r\n|\r").expect("Failed to create LINE_SEPARATORS_REGEX"));
29
30#[derive(Clone, Copy, Debug, PartialEq)]
31pub enum LineEnding {
32 Unix,
33 Windows,
34}
35
36impl Default for LineEnding {
37 fn default() -> Self {
38 #[cfg(unix)]
39 return Self::Unix;
40
41 #[cfg(not(unix))]
42 return Self::Windows;
43 }
44}
45
46impl LineEnding {
47 pub fn as_str(&self) -> &'static str {
48 match self {
49 LineEnding::Unix => "\n",
50 LineEnding::Windows => "\r\n",
51 }
52 }
53
54 pub fn label(&self) -> &'static str {
55 match self {
56 LineEnding::Unix => "LF",
57 LineEnding::Windows => "CRLF",
58 }
59 }
60
61 pub fn detect(text: &str) -> Self {
62 let mut max_ix = cmp::min(text.len(), 1000);
63 while !text.is_char_boundary(max_ix) {
64 max_ix -= 1;
65 }
66
67 if let Some(ix) = text[..max_ix].find(['\n']) {
68 if ix > 0 && text.as_bytes()[ix - 1] == b'\r' {
69 Self::Windows
70 } else {
71 Self::Unix
72 }
73 } else {
74 Self::default()
75 }
76 }
77
78 pub fn normalize(text: &mut String) {
79 if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(text, "\n") {
80 *text = replaced;
81 }
82 }
83
84 pub fn normalize_arc(text: Arc<str>) -> Arc<str> {
85 if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") {
86 replaced.into()
87 } else {
88 text
89 }
90 }
91
92 pub fn normalize_cow(text: Cow<str>) -> Cow<str> {
93 if let Cow::Owned(replaced) = LINE_SEPARATORS_REGEX.replace_all(&text, "\n") {
94 replaced.into()
95 } else {
96 text
97 }
98 }
99
100 /// Converts text chunks into a [`String`] using the current line ending.
101 pub fn into_string(&self, chunks: Chunks<'_>) -> String {
102 match self {
103 LineEnding::Unix => chunks.collect(),
104 LineEnding::Windows => {
105 let line_ending = self.as_str();
106 let mut result = String::new();
107 for chunk in chunks {
108 result.push_str(&chunk.replace('\n', line_ending));
109 }
110 result
111 }
112 }
113 }
114}
115
116#[derive(Clone, Default)]
117pub struct Rope {
118 chunks: SumTree<Chunk>,
119}
120
121impl Rope {
122 pub fn new() -> Self {
123 Self::default()
124 }
125
126 /// Checks that `index`-th byte is the first byte in a UTF-8 code point
127 /// sequence or the end of the string.
128 ///
129 /// The start and end of the string (when `index == self.len()`) are
130 /// considered to be boundaries.
131 ///
132 /// Returns `false` if `index` is greater than `self.len()`.
133 pub fn is_char_boundary(&self, offset: usize) -> bool {
134 if self.chunks.is_empty() {
135 return offset == 0;
136 }
137 let (start, _, item) = self.chunks.find::<usize, _>((), &offset, Bias::Left);
138 let chunk_offset = offset - start;
139 item.map(|chunk| chunk.is_char_boundary(chunk_offset))
140 .unwrap_or(false)
141 }
142
143 #[track_caller]
144 #[inline(always)]
145 pub fn assert_char_boundary(&self, offset: usize) {
146 if self.chunks.is_empty() && offset == 0 {
147 return;
148 }
149 let (start, _, item) = self.chunks.find::<usize, _>((), &offset, Bias::Left);
150 match item {
151 Some(chunk) => {
152 let chunk_offset = offset - start;
153 chunk.assert_char_boundary(chunk_offset);
154 }
155 None => {
156 panic!(
157 "byte index {} is out of bounds of rope (length: {})",
158 offset,
159 self.len()
160 );
161 }
162 }
163 }
164
165 pub fn floor_char_boundary(&self, index: usize) -> usize {
166 if index >= self.len() {
167 self.len()
168 } else {
169 #[inline]
170 pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
171 // This is bit magic equivalent to: b < 128 || b >= 192
172 (u8 as i8) >= -0x40
173 }
174
175 let (start, _, item) = self.chunks.find::<usize, _>((), &index, Bias::Left);
176 let chunk_offset = index - start;
177 let lower_idx = item.map(|chunk| {
178 let lower_bound = chunk_offset.saturating_sub(3);
179 chunk
180 .text
181 .as_bytes()
182 .get(lower_bound..=chunk_offset)
183 .map(|it| {
184 let new_idx = it
185 .iter()
186 .rposition(|&b| is_utf8_char_boundary(b))
187 .unwrap_or(0);
188 lower_bound + new_idx
189 })
190 .unwrap_or(chunk.text.len())
191 });
192 lower_idx.map_or_else(|| self.len(), |idx| start + idx)
193 }
194 }
195
196 pub fn ceil_char_boundary(&self, index: usize) -> usize {
197 if index > self.len() {
198 self.len()
199 } else {
200 #[inline]
201 pub(crate) const fn is_utf8_char_boundary(u8: u8) -> bool {
202 // This is bit magic equivalent to: b < 128 || b >= 192
203 (u8 as i8) >= -0x40
204 }
205
206 let (start, _, item) = self.chunks.find::<usize, _>((), &index, Bias::Left);
207 let chunk_offset = index - start;
208 let upper_idx = item.map(|chunk| {
209 let upper_bound = Ord::min(chunk_offset + 4, chunk.text.len());
210 chunk.text.as_bytes()[chunk_offset..upper_bound]
211 .iter()
212 .position(|&b| is_utf8_char_boundary(b))
213 .map_or(upper_bound, |pos| pos + chunk_offset)
214 });
215
216 upper_idx.map_or_else(|| self.len(), |idx| start + idx)
217 }
218 }
219
220 pub fn append(&mut self, rope: Rope) {
221 if let Some(chunk) = rope.chunks.first()
222 && (self
223 .chunks
224 .last()
225 .is_some_and(|c| c.text.len() < chunk::MIN_BASE)
226 || chunk.text.len() < chunk::MIN_BASE)
227 {
228 self.push_chunk(chunk.as_slice());
229
230 let mut chunks = rope.chunks.cursor::<()>(());
231 chunks.next();
232 chunks.next();
233 self.chunks.append(chunks.suffix(), ());
234 } else {
235 self.chunks.append(rope.chunks, ());
236 }
237 self.check_invariants();
238 }
239
240 pub fn replace(&mut self, range: Range<usize>, text: &str) {
241 let mut new_rope = Rope::new();
242 let mut cursor = self.cursor(0);
243 new_rope.append(cursor.slice(range.start));
244 cursor.seek_forward(range.end);
245 new_rope.push(text);
246 new_rope.append(cursor.suffix());
247 *self = new_rope;
248 }
249
250 pub fn slice(&self, range: Range<usize>) -> Rope {
251 let mut cursor = self.cursor(0);
252 cursor.seek_forward(range.start);
253 cursor.slice(range.end)
254 }
255
256 pub fn slice_rows(&self, range: Range<u32>) -> Rope {
257 // This would be more efficient with a forward advance after the first, but it's fine.
258 let start = self.point_to_offset(Point::new(range.start, 0));
259 let end = self.point_to_offset(Point::new(range.end, 0));
260 self.slice(start..end)
261 }
262
263 pub fn push(&mut self, mut text: &str) {
264 self.chunks.update_last(
265 |last_chunk| {
266 let split_ix = if last_chunk.text.len() + text.len() <= chunk::MAX_BASE {
267 text.len()
268 } else {
269 let mut split_ix = cmp::min(
270 chunk::MIN_BASE.saturating_sub(last_chunk.text.len()),
271 text.len(),
272 );
273 while !text.is_char_boundary(split_ix) {
274 split_ix += 1;
275 }
276 split_ix
277 };
278
279 let (suffix, remainder) = text.split_at(split_ix);
280 last_chunk.push_str(suffix);
281 text = remainder;
282 },
283 (),
284 );
285
286 #[cfg(not(test))]
287 const NUM_CHUNKS: usize = 16;
288 #[cfg(test)]
289 const NUM_CHUNKS: usize = 4;
290
291 // We accommodate for NUM_CHUNKS chunks of size MAX_BASE
292 // but given the chunk boundary can land within a character
293 // we need to accommodate for the worst case where every chunk gets cut short by up to 4 bytes
294 if text.len() > NUM_CHUNKS * chunk::MAX_BASE - NUM_CHUNKS * 4 {
295 return self.push_large(text);
296 }
297 // 16 is enough as otherwise we will hit the branch above
298 let mut new_chunks = ArrayVec::<_, NUM_CHUNKS>::new();
299
300 while !text.is_empty() {
301 let mut split_ix = cmp::min(chunk::MAX_BASE, text.len());
302 while !text.is_char_boundary(split_ix) {
303 split_ix -= 1;
304 }
305 let (chunk, remainder) = text.split_at(split_ix);
306 new_chunks.push(chunk);
307 text = remainder;
308 }
309 self.chunks
310 .extend(new_chunks.into_iter().map(Chunk::new), ());
311
312 self.check_invariants();
313 }
314
315 /// A copy of `push` specialized for working with large quantities of text.
316 fn push_large(&mut self, mut text: &str) {
317 // To avoid frequent reallocs when loading large swaths of file contents,
318 // we estimate worst-case `new_chunks` capacity;
319 // Chunk is a fixed-capacity buffer. If a character falls on
320 // chunk boundary, we push it off to the following chunk (thus leaving a small bit of capacity unfilled in current chunk).
321 // Worst-case chunk count when loading a file is then a case where every chunk ends up with that unused capacity.
322 // Since we're working with UTF-8, each character is at most 4 bytes wide. It follows then that the worst case is where
323 // a chunk ends with 3 bytes of a 4-byte character. These 3 bytes end up being stored in the following chunk, thus wasting
324 // 3 bytes of storage in current chunk.
325 // For example, a 1024-byte string can occupy between 32 (full ASCII, 1024/32) and 36 (full 4-byte UTF-8, 1024 / 29 rounded up) chunks.
326 const MIN_CHUNK_SIZE: usize = chunk::MAX_BASE - 3;
327
328 // We also round up the capacity up by one, for a good measure; we *really* don't want to realloc here, as we assume that the # of characters
329 // we're working with there is large.
330 let capacity = text.len().div_ceil(MIN_CHUNK_SIZE);
331 let mut new_chunks = Vec::with_capacity(capacity);
332
333 while !text.is_empty() {
334 let mut split_ix = cmp::min(chunk::MAX_BASE, text.len());
335 while !text.is_char_boundary(split_ix) {
336 split_ix -= 1;
337 }
338 let (chunk, remainder) = text.split_at(split_ix);
339 new_chunks.push(chunk);
340 text = remainder;
341 }
342
343 #[cfg(test)]
344 const PARALLEL_THRESHOLD: usize = 4;
345 #[cfg(not(test))]
346 const PARALLEL_THRESHOLD: usize = 4 * (2 * sum_tree::TREE_BASE);
347
348 if new_chunks.len() >= PARALLEL_THRESHOLD {
349 self.chunks
350 .par_extend(new_chunks.into_par_iter().map(Chunk::new), ());
351 } else {
352 self.chunks
353 .extend(new_chunks.into_iter().map(Chunk::new), ());
354 }
355
356 self.check_invariants();
357 }
358
359 fn push_chunk(&mut self, mut chunk: ChunkSlice) {
360 self.chunks.update_last(
361 |last_chunk| {
362 let split_ix = if last_chunk.text.len() + chunk.len() <= chunk::MAX_BASE {
363 chunk.len()
364 } else {
365 let mut split_ix = cmp::min(
366 chunk::MIN_BASE.saturating_sub(last_chunk.text.len()),
367 chunk.len(),
368 );
369 while !chunk.is_char_boundary(split_ix) {
370 split_ix += 1;
371 }
372 split_ix
373 };
374
375 let (suffix, remainder) = chunk.split_at(split_ix);
376 last_chunk.append(suffix);
377 chunk = remainder;
378 },
379 (),
380 );
381
382 if !chunk.is_empty() {
383 self.chunks.push(chunk.into(), ());
384 }
385 }
386
387 pub fn push_front(&mut self, text: &str) {
388 let suffix = mem::replace(self, Rope::from(text));
389 self.append(suffix);
390 }
391
392 fn check_invariants(&self) {
393 #[cfg(test)]
394 {
395 // Ensure all chunks except maybe the last one are not underflowing.
396 // Allow some wiggle room for multibyte characters at chunk boundaries.
397 let mut chunks = self.chunks.cursor::<()>(()).peekable();
398 while let Some(chunk) = chunks.next() {
399 if chunks.peek().is_some() {
400 assert!(chunk.text.len() + 3 >= chunk::MIN_BASE);
401 }
402 }
403 }
404 }
405
406 pub fn summary(&self) -> TextSummary {
407 self.chunks.summary().text
408 }
409
410 pub fn len(&self) -> usize {
411 self.chunks.extent(())
412 }
413
414 pub fn is_empty(&self) -> bool {
415 self.len() == 0
416 }
417
418 pub fn max_point(&self) -> Point {
419 self.chunks.extent(())
420 }
421
422 pub fn max_point_utf16(&self) -> PointUtf16 {
423 self.chunks.extent(())
424 }
425
426 pub fn cursor(&self, offset: usize) -> Cursor<'_> {
427 Cursor::new(self, offset)
428 }
429
430 pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
431 self.chars_at(0)
432 }
433
434 pub fn chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
435 self.chunks_in_range(start..self.len()).flat_map(str::chars)
436 }
437
438 pub fn reversed_chars_at(&self, start: usize) -> impl Iterator<Item = char> + '_ {
439 self.reversed_chunks_in_range(0..start)
440 .flat_map(|chunk| chunk.chars().rev())
441 }
442
443 pub fn bytes_in_range(&self, range: Range<usize>) -> Bytes<'_> {
444 Bytes::new(self, range, false)
445 }
446
447 pub fn reversed_bytes_in_range(&self, range: Range<usize>) -> Bytes<'_> {
448 Bytes::new(self, range, true)
449 }
450
451 pub fn chunks(&self) -> Chunks<'_> {
452 self.chunks_in_range(0..self.len())
453 }
454
455 pub fn chunks_in_range(&self, range: Range<usize>) -> Chunks<'_> {
456 Chunks::new(self, range, false)
457 }
458
459 pub fn reversed_chunks_in_range(&self, range: Range<usize>) -> Chunks<'_> {
460 Chunks::new(self, range, true)
461 }
462
463 /// Formats the rope's text with the specified line ending string.
464 /// This replaces all `\n` characters with the provided line ending.
465 ///
466 /// The rope internally stores all line breaks as `\n` (see `Display` impl).
467 /// Use this method to convert to different line endings for file operations,
468 /// LSP communication, or other scenarios requiring specific line ending formats.
469 pub fn to_string_with_line_ending(&self, line_ending: LineEnding) -> String {
470 line_ending.into_string(self.chunks())
471 }
472
473 pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
474 if offset >= self.summary().len {
475 return self.summary().len_utf16;
476 }
477 let (start, _, item) =
478 self.chunks
479 .find::<Dimensions<usize, OffsetUtf16>, _>((), &offset, Bias::Left);
480 let overshoot = offset - start.0;
481 start.1
482 + item.map_or(Default::default(), |chunk| {
483 chunk.as_slice().offset_to_offset_utf16(overshoot)
484 })
485 }
486
487 pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
488 if offset >= self.summary().len_utf16 {
489 return self.summary().len;
490 }
491 let (start, _, item) =
492 self.chunks
493 .find::<Dimensions<OffsetUtf16, usize>, _>((), &offset, Bias::Left);
494 let overshoot = offset - start.0;
495 start.1
496 + item.map_or(Default::default(), |chunk| {
497 chunk.as_slice().offset_utf16_to_offset(overshoot)
498 })
499 }
500
501 pub fn offset_to_point(&self, offset: usize) -> Point {
502 if offset >= self.summary().len {
503 return self.summary().lines;
504 }
505 let (start, _, item) =
506 self.chunks
507 .find::<Dimensions<usize, Point>, _>((), &offset, Bias::Left);
508 let overshoot = offset - start.0;
509 start.1
510 + item.map_or(Point::zero(), |chunk| {
511 chunk.as_slice().offset_to_point(overshoot)
512 })
513 }
514
515 pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
516 if offset >= self.summary().len {
517 return self.summary().lines_utf16();
518 }
519 let (start, _, item) =
520 self.chunks
521 .find::<Dimensions<usize, PointUtf16>, _>((), &offset, Bias::Left);
522 let overshoot = offset - start.0;
523 start.1
524 + item.map_or(PointUtf16::zero(), |chunk| {
525 chunk.as_slice().offset_to_point_utf16(overshoot)
526 })
527 }
528
529 pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
530 if point >= self.summary().lines {
531 return self.summary().lines_utf16();
532 }
533 let (start, _, item) =
534 self.chunks
535 .find::<Dimensions<Point, PointUtf16>, _>((), &point, Bias::Left);
536 let overshoot = point - start.0;
537 start.1
538 + item.map_or(PointUtf16::zero(), |chunk| {
539 chunk.as_slice().point_to_point_utf16(overshoot)
540 })
541 }
542
543 pub fn point_utf16_to_point(&self, point: PointUtf16) -> Point {
544 if point >= self.summary().lines_utf16() {
545 return self.summary().lines;
546 }
547 let mut cursor = self.chunks.cursor::<Dimensions<PointUtf16, Point>>(());
548 cursor.seek(&point, Bias::Left);
549 let overshoot = point - cursor.start().0;
550 cursor.start().1
551 + cursor.item().map_or(Point::zero(), |chunk| {
552 chunk
553 .as_slice()
554 .offset_to_point(chunk.as_slice().point_utf16_to_offset(overshoot, false))
555 })
556 }
557
558 pub fn point_to_offset(&self, point: Point) -> usize {
559 if point >= self.summary().lines {
560 return self.summary().len;
561 }
562 let (start, _, item) =
563 self.chunks
564 .find::<Dimensions<Point, usize>, _>((), &point, Bias::Left);
565 let overshoot = point - start.0;
566 start.1 + item.map_or(0, |chunk| chunk.as_slice().point_to_offset(overshoot))
567 }
568
569 pub fn point_to_offset_utf16(&self, point: Point) -> OffsetUtf16 {
570 if point >= self.summary().lines {
571 return self.summary().len_utf16;
572 }
573 let mut cursor = self.chunks.cursor::<Dimensions<Point, OffsetUtf16>>(());
574 cursor.seek(&point, Bias::Left);
575 let overshoot = point - cursor.start().0;
576 cursor.start().1
577 + cursor.item().map_or(OffsetUtf16(0), |chunk| {
578 chunk.as_slice().point_to_offset_utf16(overshoot)
579 })
580 }
581
582 pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
583 self.point_utf16_to_offset_impl(point, false)
584 }
585
586 pub fn point_utf16_to_offset_utf16(&self, point: PointUtf16) -> OffsetUtf16 {
587 self.point_utf16_to_offset_utf16_impl(point, false)
588 }
589
590 pub fn unclipped_point_utf16_to_offset(&self, point: Unclipped<PointUtf16>) -> usize {
591 self.point_utf16_to_offset_impl(point.0, true)
592 }
593
594 fn point_utf16_to_offset_impl(&self, point: PointUtf16, clip: bool) -> usize {
595 if point >= self.summary().lines_utf16() {
596 return self.summary().len;
597 }
598 let (start, _, item) =
599 self.chunks
600 .find::<Dimensions<PointUtf16, usize>, _>((), &point, Bias::Left);
601 let overshoot = point - start.0;
602 start.1
603 + item.map_or(0, |chunk| {
604 chunk.as_slice().point_utf16_to_offset(overshoot, clip)
605 })
606 }
607
608 fn point_utf16_to_offset_utf16_impl(&self, point: PointUtf16, clip: bool) -> OffsetUtf16 {
609 if point >= self.summary().lines_utf16() {
610 return self.summary().len_utf16;
611 }
612 let mut cursor = self
613 .chunks
614 .cursor::<Dimensions<PointUtf16, OffsetUtf16>>(());
615 cursor.seek(&point, Bias::Left);
616 let overshoot = point - cursor.start().0;
617 cursor.start().1
618 + cursor.item().map_or(OffsetUtf16(0), |chunk| {
619 chunk
620 .as_slice()
621 .offset_to_offset_utf16(chunk.as_slice().point_utf16_to_offset(overshoot, clip))
622 })
623 }
624
625 pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
626 if point.0 >= self.summary().lines_utf16() {
627 return self.summary().lines;
628 }
629 let (start, _, item) =
630 self.chunks
631 .find::<Dimensions<PointUtf16, Point>, _>((), &point.0, Bias::Left);
632 let overshoot = Unclipped(point.0 - start.0);
633 start.1
634 + item.map_or(Point::zero(), |chunk| {
635 chunk.as_slice().unclipped_point_utf16_to_point(overshoot)
636 })
637 }
638
639 pub fn clip_offset(&self, offset: usize, bias: Bias) -> usize {
640 match bias {
641 Bias::Left => self.floor_char_boundary(offset),
642 Bias::Right => self.ceil_char_boundary(offset),
643 }
644 }
645
646 pub fn clip_offset_utf16(&self, offset: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
647 let (start, _, item) = self.chunks.find::<OffsetUtf16, _>((), &offset, Bias::Right);
648 if let Some(chunk) = item {
649 let overshoot = offset - start;
650 start + chunk.as_slice().clip_offset_utf16(overshoot, bias)
651 } else {
652 self.summary().len_utf16
653 }
654 }
655
656 pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
657 let (start, _, item) = self.chunks.find::<Point, _>((), &point, Bias::Right);
658 if let Some(chunk) = item {
659 let overshoot = point - start;
660 start + chunk.as_slice().clip_point(overshoot, bias)
661 } else {
662 self.summary().lines
663 }
664 }
665
666 pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
667 let (start, _, item) = self.chunks.find::<PointUtf16, _>((), &point.0, Bias::Right);
668 if let Some(chunk) = item {
669 let overshoot = Unclipped(point.0 - start);
670 start + chunk.as_slice().clip_point_utf16(overshoot, bias)
671 } else {
672 self.summary().lines_utf16()
673 }
674 }
675
676 pub fn line_len(&self, row: u32) -> u32 {
677 self.clip_point(Point::new(row, u32::MAX), Bias::Left)
678 .column
679 }
680}
681
682impl<'a> From<&'a str> for Rope {
683 fn from(text: &'a str) -> Self {
684 let mut rope = Self::new();
685 rope.push(text);
686 rope
687 }
688}
689
690impl<'a> FromIterator<&'a str> for Rope {
691 fn from_iter<T: IntoIterator<Item = &'a str>>(iter: T) -> Self {
692 let mut rope = Rope::new();
693 for chunk in iter {
694 rope.push(chunk);
695 }
696 rope
697 }
698}
699
700impl From<String> for Rope {
701 #[inline(always)]
702 fn from(text: String) -> Self {
703 Rope::from(text.as_str())
704 }
705}
706
707impl From<&String> for Rope {
708 #[inline(always)]
709 fn from(text: &String) -> Self {
710 Rope::from(text.as_str())
711 }
712}
713
714/// Display implementation for Rope.
715///
716/// Note: This always uses `\n` as the line separator, regardless of the original
717/// file's line endings. The rope internally normalizes all line breaks to `\n`.
718/// If you need to preserve original line endings (e.g., for LSP communication),
719/// use `to_string_with_line_ending` instead.
720impl fmt::Display for Rope {
721 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
722 for chunk in self.chunks() {
723 write!(f, "{chunk}")?;
724 }
725 Ok(())
726 }
727}
728
729impl fmt::Debug for Rope {
730 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
731 use std::fmt::Write as _;
732
733 write!(f, "\"")?;
734 let mut format_string = String::new();
735 for chunk in self.chunks() {
736 write!(&mut format_string, "{:?}", chunk)?;
737 write!(f, "{}", &format_string[1..format_string.len() - 1])?;
738 format_string.clear();
739 }
740 write!(f, "\"")?;
741 Ok(())
742 }
743}
744
745pub struct Cursor<'a> {
746 rope: &'a Rope,
747 chunks: sum_tree::Cursor<'a, 'static, Chunk, usize>,
748 offset: usize,
749}
750
751impl<'a> Cursor<'a> {
752 pub fn new(rope: &'a Rope, offset: usize) -> Self {
753 let mut chunks = rope.chunks.cursor(());
754 chunks.seek(&offset, Bias::Right);
755 Self {
756 rope,
757 chunks,
758 offset,
759 }
760 }
761
762 pub fn seek_forward(&mut self, end_offset: usize) {
763 debug_assert!(end_offset >= self.offset);
764
765 self.chunks.seek_forward(&end_offset, Bias::Right);
766 self.offset = end_offset;
767 }
768
769 pub fn slice(&mut self, end_offset: usize) -> Rope {
770 debug_assert!(
771 end_offset >= self.offset,
772 "cannot slice backwards from {} to {}",
773 self.offset,
774 end_offset
775 );
776
777 let mut slice = Rope::new();
778 if let Some(start_chunk) = self.chunks.item() {
779 let start_ix = self.offset - self.chunks.start();
780 let end_ix = cmp::min(end_offset, self.chunks.end()) - self.chunks.start();
781 slice.push_chunk(start_chunk.slice(start_ix..end_ix));
782 }
783
784 if end_offset > self.chunks.end() {
785 self.chunks.next();
786 slice.append(Rope {
787 chunks: self.chunks.slice(&end_offset, Bias::Right),
788 });
789 if let Some(end_chunk) = self.chunks.item() {
790 let end_ix = end_offset - self.chunks.start();
791 slice.push_chunk(end_chunk.slice(0..end_ix));
792 }
793 }
794
795 self.offset = end_offset;
796 slice
797 }
798
799 pub fn summary<D: TextDimension>(&mut self, end_offset: usize) -> D {
800 debug_assert!(end_offset >= self.offset);
801
802 let mut summary = D::zero(());
803 if let Some(start_chunk) = self.chunks.item() {
804 let start_ix = self.offset - self.chunks.start();
805 let end_ix = cmp::min(end_offset, self.chunks.end()) - self.chunks.start();
806 summary.add_assign(&D::from_chunk(start_chunk.slice(start_ix..end_ix)));
807 }
808
809 if end_offset > self.chunks.end() {
810 self.chunks.next();
811 summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right));
812 if let Some(end_chunk) = self.chunks.item() {
813 let end_ix = end_offset - self.chunks.start();
814 summary.add_assign(&D::from_chunk(end_chunk.slice(0..end_ix)));
815 }
816 }
817
818 self.offset = end_offset;
819 summary
820 }
821
822 pub fn suffix(mut self) -> Rope {
823 self.slice(self.rope.chunks.extent(()))
824 }
825
826 pub fn offset(&self) -> usize {
827 self.offset
828 }
829}
830
831pub struct ChunkBitmaps<'a> {
832 /// A slice of text up to 128 bytes in size
833 pub text: &'a str,
834 /// Bitmap of character locations in text. LSB ordered
835 pub chars: Bitmap,
836 /// Bitmap of tab locations in text. LSB ordered
837 pub tabs: Bitmap,
838}
839
840#[derive(Clone)]
841pub struct Chunks<'a> {
842 chunks: sum_tree::Cursor<'a, 'static, Chunk, usize>,
843 range: Range<usize>,
844 offset: usize,
845 reversed: bool,
846}
847
848impl<'a> Chunks<'a> {
849 pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
850 let mut chunks = rope.chunks.cursor(());
851 let offset = if reversed {
852 chunks.seek(&range.end, Bias::Left);
853 range.end
854 } else {
855 chunks.seek(&range.start, Bias::Right);
856 range.start
857 };
858 let chunk_offset = offset - chunks.start();
859 if let Some(chunk) = chunks.item()
860 && !chunk.text.is_char_boundary(chunk_offset)
861 {
862 panic!("byte index {} is not a char boundary", offset);
863 }
864 Self {
865 chunks,
866 range,
867 offset,
868 reversed,
869 }
870 }
871
872 fn offset_is_valid(&self) -> bool {
873 if self.reversed {
874 if self.offset <= self.range.start || self.offset > self.range.end {
875 return false;
876 }
877 } else if self.offset < self.range.start || self.offset >= self.range.end {
878 return false;
879 }
880
881 true
882 }
883
884 pub fn offset(&self) -> usize {
885 self.offset
886 }
887
888 pub fn seek(&mut self, mut offset: usize) {
889 offset = offset.clamp(self.range.start, self.range.end);
890
891 if self.reversed {
892 if offset > self.chunks.end() {
893 self.chunks.seek_forward(&offset, Bias::Left);
894 } else if offset <= *self.chunks.start() {
895 self.chunks.seek(&offset, Bias::Left);
896 }
897 } else {
898 if offset >= self.chunks.end() {
899 self.chunks.seek_forward(&offset, Bias::Right);
900 } else if offset < *self.chunks.start() {
901 self.chunks.seek(&offset, Bias::Right);
902 }
903 };
904
905 self.offset = offset;
906 }
907
908 pub fn set_range(&mut self, range: Range<usize>) {
909 self.range = range.clone();
910 self.seek(range.start);
911 }
912
913 /// Moves this cursor to the start of the next line in the rope.
914 ///
915 /// This method advances the cursor to the beginning of the next line.
916 /// If the cursor is already at the end of the rope, this method does nothing.
917 /// Reversed chunks iterators are not currently supported and will panic.
918 ///
919 /// Returns `true` if the cursor was successfully moved to the next line start,
920 /// or `false` if the cursor was already at the end of the rope.
921 pub fn next_line(&mut self) -> bool {
922 assert!(!self.reversed);
923
924 let mut found = false;
925 if let Some(chunk) = self.peek() {
926 if let Some(newline_ix) = chunk.find('\n') {
927 self.offset += newline_ix + 1;
928 found = self.offset <= self.range.end;
929 } else {
930 self.chunks
931 .search_forward(|summary| summary.text.lines.row > 0);
932 self.offset = *self.chunks.start();
933
934 if let Some(newline_ix) = self.peek().and_then(|chunk| chunk.find('\n')) {
935 self.offset += newline_ix + 1;
936 found = self.offset <= self.range.end;
937 } else {
938 self.offset = self.chunks.end();
939 }
940 }
941
942 if self.offset == self.chunks.end() {
943 self.next();
944 }
945 }
946
947 if self.offset > self.range.end {
948 self.offset = cmp::min(self.offset, self.range.end);
949 self.chunks.seek(&self.offset, Bias::Right);
950 }
951
952 found
953 }
954
955 /// Move this cursor to the preceding position in the rope that starts a new line.
956 /// Reversed chunks iterators are not currently supported and will panic.
957 ///
958 /// If this cursor is not on the start of a line, it will be moved to the start of
959 /// its current line. Otherwise it will be moved to the start of the previous line.
960 /// It updates the cursor's position and returns true if a previous line was found,
961 /// or false if the cursor was already at the start of the rope.
962 pub fn prev_line(&mut self) -> bool {
963 assert!(!self.reversed);
964
965 let initial_offset = self.offset;
966
967 if self.offset == *self.chunks.start() {
968 self.chunks.prev();
969 }
970
971 if let Some(chunk) = self.chunks.item() {
972 let mut end_ix = self.offset - *self.chunks.start();
973 if chunk.text.as_bytes()[end_ix - 1] == b'\n' {
974 end_ix -= 1;
975 }
976
977 if let Some(newline_ix) = chunk.text[..end_ix].rfind('\n') {
978 self.offset = *self.chunks.start() + newline_ix + 1;
979 if self.offset_is_valid() {
980 return true;
981 }
982 }
983 }
984
985 self.chunks
986 .search_backward(|summary| summary.text.lines.row > 0);
987 self.offset = *self.chunks.start();
988 if let Some(chunk) = self.chunks.item()
989 && let Some(newline_ix) = chunk.text.rfind('\n')
990 {
991 self.offset += newline_ix + 1;
992 if self.offset_is_valid() {
993 if self.offset == self.chunks.end() {
994 self.chunks.next();
995 }
996
997 return true;
998 }
999 }
1000
1001 if !self.offset_is_valid() || self.chunks.item().is_none() {
1002 self.offset = self.range.start;
1003 self.chunks.seek(&self.offset, Bias::Right);
1004 }
1005
1006 self.offset < initial_offset && self.offset == 0
1007 }
1008
1009 pub fn peek(&self) -> Option<&'a str> {
1010 if !self.offset_is_valid() {
1011 return None;
1012 }
1013
1014 let chunk = self.chunks.item()?;
1015 let chunk_start = *self.chunks.start();
1016 let slice_range = if self.reversed {
1017 let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
1018 let slice_end = self.offset - chunk_start;
1019 slice_start..slice_end
1020 } else {
1021 let slice_start = self.offset - chunk_start;
1022 let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start;
1023 slice_start..slice_end
1024 };
1025
1026 Some(&chunk.text[slice_range])
1027 }
1028
1029 /// Returns bitmaps that represent character positions and tab positions
1030 pub fn peek_with_bitmaps(&self) -> Option<ChunkBitmaps<'a>> {
1031 if !self.offset_is_valid() {
1032 return None;
1033 }
1034
1035 let chunk = self.chunks.item()?;
1036 let chunk_start = *self.chunks.start();
1037 let slice_range = if self.reversed {
1038 let slice_start = cmp::max(chunk_start, self.range.start) - chunk_start;
1039 let slice_end = self.offset - chunk_start;
1040 slice_start..slice_end
1041 } else {
1042 let slice_start = self.offset - chunk_start;
1043 let slice_end = cmp::min(self.chunks.end(), self.range.end) - chunk_start;
1044 slice_start..slice_end
1045 };
1046 let chunk_start_offset = slice_range.start;
1047 let slice_text = &chunk.text[slice_range];
1048
1049 // Shift the tabs to align with our slice window
1050 let shifted_tabs = chunk.tabs() >> chunk_start_offset;
1051 let shifted_chars = chunk.chars() >> chunk_start_offset;
1052
1053 Some(ChunkBitmaps {
1054 text: slice_text,
1055 chars: shifted_chars,
1056 tabs: shifted_tabs,
1057 })
1058 }
1059
1060 pub fn lines(self) -> Lines<'a> {
1061 let reversed = self.reversed;
1062 Lines {
1063 chunks: self,
1064 current_line: String::new(),
1065 done: false,
1066 reversed,
1067 }
1068 }
1069
1070 pub fn equals_str(&self, other: &str) -> bool {
1071 let chunk = self.clone();
1072 if chunk.reversed {
1073 let mut offset = other.len();
1074 for chunk in chunk {
1075 if other[0..offset].ends_with(chunk) {
1076 offset -= chunk.len();
1077 } else {
1078 return false;
1079 }
1080 }
1081 if offset != 0 {
1082 return false;
1083 }
1084 } else {
1085 let mut offset = 0;
1086 for chunk in chunk {
1087 if offset >= other.len() {
1088 return false;
1089 }
1090 if other[offset..].starts_with(chunk) {
1091 offset += chunk.len();
1092 } else {
1093 return false;
1094 }
1095 }
1096 if offset != other.len() {
1097 return false;
1098 }
1099 }
1100
1101 true
1102 }
1103}
1104
1105pub struct ChunkWithBitmaps<'a>(pub Chunks<'a>);
1106
1107impl<'a> Iterator for ChunkWithBitmaps<'a> {
1108 /// text, chars bitmap, tabs bitmap
1109 type Item = ChunkBitmaps<'a>;
1110
1111 fn next(&mut self) -> Option<Self::Item> {
1112 let chunk_bitmaps = self.0.peek_with_bitmaps()?;
1113 if self.0.reversed {
1114 self.0.offset -= chunk_bitmaps.text.len();
1115 if self.0.offset <= *self.0.chunks.start() {
1116 self.0.chunks.prev();
1117 }
1118 } else {
1119 self.0.offset += chunk_bitmaps.text.len();
1120 if self.0.offset >= self.0.chunks.end() {
1121 self.0.chunks.next();
1122 }
1123 }
1124
1125 Some(chunk_bitmaps)
1126 }
1127}
1128
1129impl<'a> Iterator for Chunks<'a> {
1130 type Item = &'a str;
1131
1132 fn next(&mut self) -> Option<Self::Item> {
1133 let chunk = self.peek()?;
1134 if self.reversed {
1135 self.offset -= chunk.len();
1136 if self.offset <= *self.chunks.start() {
1137 self.chunks.prev();
1138 }
1139 } else {
1140 self.offset += chunk.len();
1141 if self.offset >= self.chunks.end() {
1142 self.chunks.next();
1143 }
1144 }
1145
1146 Some(chunk)
1147 }
1148}
1149
1150pub struct Bytes<'a> {
1151 chunks: sum_tree::Cursor<'a, 'static, Chunk, usize>,
1152 range: Range<usize>,
1153 reversed: bool,
1154}
1155
1156impl<'a> Bytes<'a> {
1157 pub fn new(rope: &'a Rope, range: Range<usize>, reversed: bool) -> Self {
1158 let mut chunks = rope.chunks.cursor(());
1159 if reversed {
1160 chunks.seek(&range.end, Bias::Left);
1161 } else {
1162 chunks.seek(&range.start, Bias::Right);
1163 }
1164 Self {
1165 chunks,
1166 range,
1167 reversed,
1168 }
1169 }
1170
1171 pub fn peek(&self) -> Option<&'a [u8]> {
1172 let chunk = self.chunks.item()?;
1173 if self.reversed && self.range.start >= self.chunks.end() {
1174 return None;
1175 }
1176 let chunk_start = *self.chunks.start();
1177 if self.range.end <= chunk_start {
1178 return None;
1179 }
1180 let start = self.range.start.saturating_sub(chunk_start);
1181 let end = self.range.end - chunk_start;
1182 Some(&chunk.text.as_bytes()[start..chunk.text.len().min(end)])
1183 }
1184}
1185
1186impl<'a> Iterator for Bytes<'a> {
1187 type Item = &'a [u8];
1188
1189 fn next(&mut self) -> Option<Self::Item> {
1190 let result = self.peek();
1191 if result.is_some() {
1192 if self.reversed {
1193 self.chunks.prev();
1194 } else {
1195 self.chunks.next();
1196 }
1197 }
1198 result
1199 }
1200}
1201
1202impl io::Read for Bytes<'_> {
1203 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1204 if let Some(chunk) = self.peek() {
1205 let len = cmp::min(buf.len(), chunk.len());
1206 if self.reversed {
1207 buf[..len].copy_from_slice(&chunk[chunk.len() - len..]);
1208 buf[..len].reverse();
1209 self.range.end -= len;
1210 } else {
1211 buf[..len].copy_from_slice(&chunk[..len]);
1212 self.range.start += len;
1213 }
1214
1215 if len == chunk.len() {
1216 if self.reversed {
1217 self.chunks.prev();
1218 } else {
1219 self.chunks.next();
1220 }
1221 }
1222 Ok(len)
1223 } else {
1224 Ok(0)
1225 }
1226 }
1227}
1228
1229pub struct Lines<'a> {
1230 chunks: Chunks<'a>,
1231 current_line: String,
1232 done: bool,
1233 reversed: bool,
1234}
1235
1236impl<'a> Lines<'a> {
1237 pub fn next(&mut self) -> Option<&str> {
1238 if self.done {
1239 return None;
1240 }
1241
1242 self.current_line.clear();
1243
1244 while let Some(chunk) = self.chunks.peek() {
1245 let chunk_lines = chunk.split('\n');
1246 if self.reversed {
1247 let mut chunk_lines = chunk_lines.rev().peekable();
1248 if let Some(chunk_line) = chunk_lines.next() {
1249 let done = chunk_lines.peek().is_some();
1250 if done {
1251 self.chunks
1252 .seek(self.chunks.offset() - chunk_line.len() - "\n".len());
1253 if self.current_line.is_empty() {
1254 return Some(chunk_line);
1255 }
1256 }
1257 self.current_line.insert_str(0, chunk_line);
1258 if done {
1259 return Some(&self.current_line);
1260 }
1261 }
1262 } else {
1263 let mut chunk_lines = chunk_lines.peekable();
1264 if let Some(chunk_line) = chunk_lines.next() {
1265 let done = chunk_lines.peek().is_some();
1266 if done {
1267 self.chunks
1268 .seek(self.chunks.offset() + chunk_line.len() + "\n".len());
1269 if self.current_line.is_empty() {
1270 return Some(chunk_line);
1271 }
1272 }
1273 self.current_line.push_str(chunk_line);
1274 if done {
1275 return Some(&self.current_line);
1276 }
1277 }
1278 }
1279
1280 self.chunks.next();
1281 }
1282
1283 self.done = true;
1284 Some(&self.current_line)
1285 }
1286
1287 pub fn seek(&mut self, offset: usize) {
1288 self.chunks.seek(offset);
1289 self.current_line.clear();
1290 self.done = false;
1291 }
1292
1293 pub fn offset(&self) -> usize {
1294 self.chunks.offset()
1295 }
1296}
1297
1298impl sum_tree::Item for Chunk {
1299 type Summary = ChunkSummary;
1300
1301 fn summary(&self, _cx: ()) -> Self::Summary {
1302 ChunkSummary {
1303 text: self.as_slice().text_summary(),
1304 }
1305 }
1306}
1307
1308#[derive(Clone, Debug, Default, Eq, PartialEq)]
1309pub struct ChunkSummary {
1310 text: TextSummary,
1311}
1312
1313impl sum_tree::ContextLessSummary for ChunkSummary {
1314 fn zero() -> Self {
1315 Default::default()
1316 }
1317
1318 fn add_summary(&mut self, summary: &Self) {
1319 self.text += &summary.text;
1320 }
1321}
1322
1323/// Summary of a string of text.
1324#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
1325pub struct TextSummary {
1326 /// Length in bytes.
1327 pub len: usize,
1328 /// Length in UTF-8.
1329 pub chars: usize,
1330 /// Length in UTF-16 code units
1331 pub len_utf16: OffsetUtf16,
1332 /// A point representing the number of lines and the length of the last line.
1333 ///
1334 /// In other words, it marks the point after the last byte in the text, (if
1335 /// EOF was a character, this would be its position).
1336 pub lines: Point,
1337 /// How many `char`s are in the first line
1338 pub first_line_chars: u32,
1339 /// How many `char`s are in the last line
1340 pub last_line_chars: u32,
1341 /// How many UTF-16 code units are in the last line
1342 pub last_line_len_utf16: u32,
1343 /// The row idx of the longest row
1344 pub longest_row: u32,
1345 /// How many `char`s are in the longest row
1346 pub longest_row_chars: u32,
1347}
1348
1349impl TextSummary {
1350 pub fn lines_utf16(&self) -> PointUtf16 {
1351 PointUtf16 {
1352 row: self.lines.row,
1353 column: self.last_line_len_utf16,
1354 }
1355 }
1356
1357 pub fn newline() -> Self {
1358 Self {
1359 len: 1,
1360 chars: 1,
1361 len_utf16: OffsetUtf16(1),
1362 first_line_chars: 0,
1363 last_line_chars: 0,
1364 last_line_len_utf16: 0,
1365 lines: Point::new(1, 0),
1366 longest_row: 0,
1367 longest_row_chars: 0,
1368 }
1369 }
1370
1371 pub fn add_newline(&mut self) {
1372 self.len += 1;
1373 self.len_utf16 += OffsetUtf16(self.len_utf16.0 + 1);
1374 self.last_line_chars = 0;
1375 self.last_line_len_utf16 = 0;
1376 self.lines += Point::new(1, 0);
1377 }
1378}
1379
1380impl<'a> From<&'a str> for TextSummary {
1381 fn from(text: &'a str) -> Self {
1382 let mut len_utf16 = OffsetUtf16(0);
1383 let mut lines = Point::new(0, 0);
1384 let mut first_line_chars = 0;
1385 let mut last_line_chars = 0;
1386 let mut last_line_len_utf16 = 0;
1387 let mut longest_row = 0;
1388 let mut longest_row_chars = 0;
1389 let mut chars = 0;
1390 for c in text.chars() {
1391 chars += 1;
1392 len_utf16.0 += c.len_utf16();
1393
1394 if c == '\n' {
1395 lines += Point::new(1, 0);
1396 last_line_len_utf16 = 0;
1397 last_line_chars = 0;
1398 } else {
1399 lines.column += c.len_utf8() as u32;
1400 last_line_len_utf16 += c.len_utf16() as u32;
1401 last_line_chars += 1;
1402 }
1403
1404 if lines.row == 0 {
1405 first_line_chars = last_line_chars;
1406 }
1407
1408 if last_line_chars > longest_row_chars {
1409 longest_row = lines.row;
1410 longest_row_chars = last_line_chars;
1411 }
1412 }
1413
1414 TextSummary {
1415 len: text.len(),
1416 chars,
1417 len_utf16,
1418 lines,
1419 first_line_chars,
1420 last_line_chars,
1421 last_line_len_utf16,
1422 longest_row,
1423 longest_row_chars,
1424 }
1425 }
1426}
1427
1428impl sum_tree::ContextLessSummary for TextSummary {
1429 fn zero() -> Self {
1430 Default::default()
1431 }
1432
1433 fn add_summary(&mut self, summary: &Self) {
1434 *self += summary;
1435 }
1436}
1437
1438impl ops::Add<Self> for TextSummary {
1439 type Output = Self;
1440
1441 fn add(mut self, rhs: Self) -> Self::Output {
1442 AddAssign::add_assign(&mut self, &rhs);
1443 self
1444 }
1445}
1446
1447impl<'a> ops::AddAssign<&'a Self> for TextSummary {
1448 fn add_assign(&mut self, other: &'a Self) {
1449 let joined_chars = self.last_line_chars + other.first_line_chars;
1450 if joined_chars > self.longest_row_chars {
1451 self.longest_row = self.lines.row;
1452 self.longest_row_chars = joined_chars;
1453 }
1454 if other.longest_row_chars > self.longest_row_chars {
1455 self.longest_row = self.lines.row + other.longest_row;
1456 self.longest_row_chars = other.longest_row_chars;
1457 }
1458
1459 if self.lines.row == 0 {
1460 self.first_line_chars += other.first_line_chars;
1461 }
1462
1463 if other.lines.row == 0 {
1464 self.last_line_chars += other.first_line_chars;
1465 self.last_line_len_utf16 += other.last_line_len_utf16;
1466 } else {
1467 self.last_line_chars = other.last_line_chars;
1468 self.last_line_len_utf16 = other.last_line_len_utf16;
1469 }
1470
1471 self.chars += other.chars;
1472 self.len += other.len;
1473 self.len_utf16 += other.len_utf16;
1474 self.lines += other.lines;
1475 }
1476}
1477
1478impl ops::AddAssign<Self> for TextSummary {
1479 fn add_assign(&mut self, other: Self) {
1480 *self += &other;
1481 }
1482}
1483
1484pub trait TextDimension:
1485 'static + Clone + Copy + Default + for<'a> Dimension<'a, ChunkSummary> + std::fmt::Debug
1486{
1487 fn from_text_summary(summary: &TextSummary) -> Self;
1488 fn from_chunk(chunk: ChunkSlice) -> Self;
1489 fn add_assign(&mut self, other: &Self);
1490}
1491
1492impl<D1: TextDimension, D2: TextDimension> TextDimension for Dimensions<D1, D2, ()> {
1493 fn from_text_summary(summary: &TextSummary) -> Self {
1494 Dimensions(
1495 D1::from_text_summary(summary),
1496 D2::from_text_summary(summary),
1497 (),
1498 )
1499 }
1500
1501 fn from_chunk(chunk: ChunkSlice) -> Self {
1502 Dimensions(D1::from_chunk(chunk), D2::from_chunk(chunk), ())
1503 }
1504
1505 fn add_assign(&mut self, other: &Self) {
1506 self.0.add_assign(&other.0);
1507 self.1.add_assign(&other.1);
1508 }
1509}
1510
1511impl<'a> sum_tree::Dimension<'a, ChunkSummary> for TextSummary {
1512 fn zero(_cx: ()) -> Self {
1513 Default::default()
1514 }
1515
1516 fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1517 *self += &summary.text;
1518 }
1519}
1520
1521impl TextDimension for TextSummary {
1522 fn from_text_summary(summary: &TextSummary) -> Self {
1523 *summary
1524 }
1525
1526 fn from_chunk(chunk: ChunkSlice) -> Self {
1527 chunk.text_summary()
1528 }
1529
1530 fn add_assign(&mut self, other: &Self) {
1531 *self += other;
1532 }
1533}
1534
1535impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
1536 fn zero(_cx: ()) -> Self {
1537 Default::default()
1538 }
1539
1540 fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1541 *self += summary.text.len;
1542 }
1543}
1544
1545impl TextDimension for usize {
1546 fn from_text_summary(summary: &TextSummary) -> Self {
1547 summary.len
1548 }
1549
1550 fn from_chunk(chunk: ChunkSlice) -> Self {
1551 chunk.len()
1552 }
1553
1554 fn add_assign(&mut self, other: &Self) {
1555 *self += other;
1556 }
1557}
1558
1559impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
1560 fn zero(_cx: ()) -> Self {
1561 Default::default()
1562 }
1563
1564 fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1565 *self += summary.text.len_utf16;
1566 }
1567}
1568
1569impl TextDimension for OffsetUtf16 {
1570 fn from_text_summary(summary: &TextSummary) -> Self {
1571 summary.len_utf16
1572 }
1573
1574 fn from_chunk(chunk: ChunkSlice) -> Self {
1575 chunk.len_utf16()
1576 }
1577
1578 fn add_assign(&mut self, other: &Self) {
1579 *self += other;
1580 }
1581}
1582
1583impl<'a> sum_tree::Dimension<'a, ChunkSummary> for Point {
1584 fn zero(_cx: ()) -> Self {
1585 Default::default()
1586 }
1587
1588 fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1589 *self += summary.text.lines;
1590 }
1591}
1592
1593impl TextDimension for Point {
1594 fn from_text_summary(summary: &TextSummary) -> Self {
1595 summary.lines
1596 }
1597
1598 fn from_chunk(chunk: ChunkSlice) -> Self {
1599 chunk.lines()
1600 }
1601
1602 fn add_assign(&mut self, other: &Self) {
1603 *self += other;
1604 }
1605}
1606
1607impl<'a> sum_tree::Dimension<'a, ChunkSummary> for PointUtf16 {
1608 fn zero(_cx: ()) -> Self {
1609 Default::default()
1610 }
1611
1612 fn add_summary(&mut self, summary: &'a ChunkSummary, _: ()) {
1613 *self += summary.text.lines_utf16();
1614 }
1615}
1616
1617impl TextDimension for PointUtf16 {
1618 fn from_text_summary(summary: &TextSummary) -> Self {
1619 summary.lines_utf16()
1620 }
1621
1622 fn from_chunk(chunk: ChunkSlice) -> Self {
1623 PointUtf16 {
1624 row: chunk.lines().row,
1625 column: chunk.last_line_len_utf16(),
1626 }
1627 }
1628
1629 fn add_assign(&mut self, other: &Self) {
1630 *self += other;
1631 }
1632}
1633
1634/// A pair of text dimensions in which only the first dimension is used for comparison,
1635/// but both dimensions are updated during addition and subtraction.
1636#[derive(Clone, Copy, Debug)]
1637pub struct DimensionPair<K, V> {
1638 pub key: K,
1639 pub value: Option<V>,
1640}
1641
1642impl<K: Default, V: Default> Default for DimensionPair<K, V> {
1643 fn default() -> Self {
1644 Self {
1645 key: Default::default(),
1646 value: Some(Default::default()),
1647 }
1648 }
1649}
1650
1651impl<K, V> cmp::Ord for DimensionPair<K, V>
1652where
1653 K: cmp::Ord,
1654{
1655 fn cmp(&self, other: &Self) -> cmp::Ordering {
1656 self.key.cmp(&other.key)
1657 }
1658}
1659
1660impl<K, V> cmp::PartialOrd for DimensionPair<K, V>
1661where
1662 K: cmp::PartialOrd,
1663{
1664 fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
1665 self.key.partial_cmp(&other.key)
1666 }
1667}
1668
1669impl<K, V> cmp::PartialEq for DimensionPair<K, V>
1670where
1671 K: cmp::PartialEq,
1672{
1673 fn eq(&self, other: &Self) -> bool {
1674 self.key.eq(&other.key)
1675 }
1676}
1677
1678impl<K, V> ops::Sub for DimensionPair<K, V>
1679where
1680 K: ops::Sub<K, Output = K>,
1681 V: ops::Sub<V, Output = V>,
1682{
1683 type Output = Self;
1684
1685 fn sub(self, rhs: Self) -> Self::Output {
1686 Self {
1687 key: self.key - rhs.key,
1688 value: self.value.zip(rhs.value).map(|(a, b)| a - b),
1689 }
1690 }
1691}
1692
1693impl<K, V> cmp::Eq for DimensionPair<K, V> where K: cmp::Eq {}
1694
1695impl<'a, K, V> sum_tree::Dimension<'a, ChunkSummary> for DimensionPair<K, V>
1696where
1697 K: sum_tree::Dimension<'a, ChunkSummary>,
1698 V: sum_tree::Dimension<'a, ChunkSummary>,
1699{
1700 fn zero(_cx: ()) -> Self {
1701 Self {
1702 key: K::zero(_cx),
1703 value: Some(V::zero(_cx)),
1704 }
1705 }
1706
1707 fn add_summary(&mut self, summary: &'a ChunkSummary, _cx: ()) {
1708 self.key.add_summary(summary, _cx);
1709 if let Some(value) = &mut self.value {
1710 value.add_summary(summary, _cx);
1711 }
1712 }
1713}
1714
1715impl<K, V> TextDimension for DimensionPair<K, V>
1716where
1717 K: TextDimension,
1718 V: TextDimension,
1719{
1720 fn add_assign(&mut self, other: &Self) {
1721 self.key.add_assign(&other.key);
1722 if let Some(value) = &mut self.value {
1723 if let Some(other_value) = other.value.as_ref() {
1724 value.add_assign(other_value);
1725 } else {
1726 self.value.take();
1727 }
1728 }
1729 }
1730
1731 fn from_chunk(chunk: ChunkSlice) -> Self {
1732 Self {
1733 key: K::from_chunk(chunk),
1734 value: Some(V::from_chunk(chunk)),
1735 }
1736 }
1737
1738 fn from_text_summary(summary: &TextSummary) -> Self {
1739 Self {
1740 key: K::from_text_summary(summary),
1741 value: Some(V::from_text_summary(summary)),
1742 }
1743 }
1744}
1745
1746#[cfg(test)]
1747mod tests {
1748 use super::*;
1749 use Bias::{Left, Right};
1750 use rand::prelude::*;
1751 use std::{cmp::Ordering, env, io::Read};
1752 use util::RandomCharIter;
1753
1754 #[ctor::ctor]
1755 fn init_logger() {
1756 zlog::init_test();
1757 }
1758
1759 #[test]
1760 fn test_all_4_byte_chars() {
1761 let mut rope = Rope::new();
1762 let text = "🏀".repeat(256);
1763 rope.push(&text);
1764 assert_eq!(rope.text(), text);
1765 }
1766
1767 #[test]
1768 fn test_clip() {
1769 let rope = Rope::from("🧘");
1770
1771 assert_eq!(rope.clip_offset(1, Bias::Left), 0);
1772 assert_eq!(rope.clip_offset(1, Bias::Right), 4);
1773 assert_eq!(rope.clip_offset(5, Bias::Right), 4);
1774
1775 assert_eq!(
1776 rope.clip_point(Point::new(0, 1), Bias::Left),
1777 Point::new(0, 0)
1778 );
1779 assert_eq!(
1780 rope.clip_point(Point::new(0, 1), Bias::Right),
1781 Point::new(0, 4)
1782 );
1783 assert_eq!(
1784 rope.clip_point(Point::new(0, 5), Bias::Right),
1785 Point::new(0, 4)
1786 );
1787
1788 assert_eq!(
1789 rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Left),
1790 PointUtf16::new(0, 0)
1791 );
1792 assert_eq!(
1793 rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 1)), Bias::Right),
1794 PointUtf16::new(0, 2)
1795 );
1796 assert_eq!(
1797 rope.clip_point_utf16(Unclipped(PointUtf16::new(0, 3)), Bias::Right),
1798 PointUtf16::new(0, 2)
1799 );
1800
1801 assert_eq!(
1802 rope.clip_offset_utf16(OffsetUtf16(1), Bias::Left),
1803 OffsetUtf16(0)
1804 );
1805 assert_eq!(
1806 rope.clip_offset_utf16(OffsetUtf16(1), Bias::Right),
1807 OffsetUtf16(2)
1808 );
1809 assert_eq!(
1810 rope.clip_offset_utf16(OffsetUtf16(3), Bias::Right),
1811 OffsetUtf16(2)
1812 );
1813 }
1814
1815 #[test]
1816 fn test_prev_next_line() {
1817 let rope = Rope::from("abc\ndef\nghi\njkl");
1818
1819 let mut chunks = rope.chunks();
1820 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
1821
1822 assert!(chunks.next_line());
1823 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd');
1824
1825 assert!(chunks.next_line());
1826 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g');
1827
1828 assert!(chunks.next_line());
1829 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j');
1830
1831 assert!(!chunks.next_line());
1832 assert_eq!(chunks.peek(), None);
1833
1834 assert!(chunks.prev_line());
1835 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'j');
1836
1837 assert!(chunks.prev_line());
1838 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'g');
1839
1840 assert!(chunks.prev_line());
1841 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'd');
1842
1843 assert!(chunks.prev_line());
1844 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
1845
1846 assert!(!chunks.prev_line());
1847 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'a');
1848
1849 // Only return true when the cursor has moved to the start of a line
1850 let mut chunks = rope.chunks_in_range(5..7);
1851 chunks.seek(6);
1852 assert!(!chunks.prev_line());
1853 assert_eq!(chunks.peek().unwrap().chars().next().unwrap(), 'e');
1854
1855 assert!(!chunks.next_line());
1856 assert_eq!(chunks.peek(), None);
1857 }
1858
1859 #[test]
1860 fn test_lines() {
1861 let rope = Rope::from("abc\ndefg\nhi");
1862 let mut lines = rope.chunks().lines();
1863 assert_eq!(lines.next(), Some("abc"));
1864 assert_eq!(lines.next(), Some("defg"));
1865 assert_eq!(lines.next(), Some("hi"));
1866 assert_eq!(lines.next(), None);
1867
1868 let rope = Rope::from("abc\ndefg\nhi\n");
1869 let mut lines = rope.chunks().lines();
1870 assert_eq!(lines.next(), Some("abc"));
1871 assert_eq!(lines.next(), Some("defg"));
1872 assert_eq!(lines.next(), Some("hi"));
1873 assert_eq!(lines.next(), Some(""));
1874 assert_eq!(lines.next(), None);
1875
1876 let rope = Rope::from("abc\ndefg\nhi");
1877 let mut lines = rope.reversed_chunks_in_range(0..rope.len()).lines();
1878 assert_eq!(lines.next(), Some("hi"));
1879 assert_eq!(lines.next(), Some("defg"));
1880 assert_eq!(lines.next(), Some("abc"));
1881 assert_eq!(lines.next(), None);
1882
1883 let rope = Rope::from("abc\ndefg\nhi\n");
1884 let mut lines = rope.reversed_chunks_in_range(0..rope.len()).lines();
1885 assert_eq!(lines.next(), Some(""));
1886 assert_eq!(lines.next(), Some("hi"));
1887 assert_eq!(lines.next(), Some("defg"));
1888 assert_eq!(lines.next(), Some("abc"));
1889 assert_eq!(lines.next(), None);
1890
1891 let rope = Rope::from("abc\nlonger line test\nhi");
1892 let mut lines = rope.chunks().lines();
1893 assert_eq!(lines.next(), Some("abc"));
1894 assert_eq!(lines.next(), Some("longer line test"));
1895 assert_eq!(lines.next(), Some("hi"));
1896 assert_eq!(lines.next(), None);
1897
1898 let rope = Rope::from("abc\nlonger line test\nhi");
1899 let mut lines = rope.reversed_chunks_in_range(0..rope.len()).lines();
1900 assert_eq!(lines.next(), Some("hi"));
1901 assert_eq!(lines.next(), Some("longer line test"));
1902 assert_eq!(lines.next(), Some("abc"));
1903 assert_eq!(lines.next(), None);
1904 }
1905
1906 #[gpui::test(iterations = 100)]
1907 fn test_random_rope(mut rng: StdRng) {
1908 let operations = env::var("OPERATIONS")
1909 .map(|i| i.parse().expect("invalid `OPERATIONS` variable"))
1910 .unwrap_or(10);
1911
1912 let mut expected = String::new();
1913 let mut actual = Rope::new();
1914 for _ in 0..operations {
1915 let end_ix = clip_offset(&expected, rng.random_range(0..=expected.len()), Right);
1916 let start_ix = clip_offset(&expected, rng.random_range(0..=end_ix), Left);
1917 let len = rng.random_range(0..=64);
1918 let new_text: String = RandomCharIter::new(&mut rng).take(len).collect();
1919
1920 let mut new_actual = Rope::new();
1921 let mut cursor = actual.cursor(0);
1922 new_actual.append(cursor.slice(start_ix));
1923 new_actual.push(&new_text);
1924 cursor.seek_forward(end_ix);
1925 new_actual.append(cursor.suffix());
1926 actual = new_actual;
1927
1928 expected.replace_range(start_ix..end_ix, &new_text);
1929
1930 assert_eq!(actual.text(), expected);
1931 log::info!("text: {:?}", expected);
1932
1933 for _ in 0..5 {
1934 let end_ix = clip_offset(&expected, rng.random_range(0..=expected.len()), Right);
1935 let start_ix = clip_offset(&expected, rng.random_range(0..=end_ix), Left);
1936
1937 let actual_text = actual.chunks_in_range(start_ix..end_ix).collect::<String>();
1938 assert_eq!(actual_text, &expected[start_ix..end_ix]);
1939
1940 let mut actual_text = String::new();
1941 actual
1942 .bytes_in_range(start_ix..end_ix)
1943 .read_to_string(&mut actual_text)
1944 .unwrap();
1945 assert_eq!(actual_text, &expected[start_ix..end_ix]);
1946
1947 assert_eq!(
1948 actual
1949 .reversed_chunks_in_range(start_ix..end_ix)
1950 .collect::<Vec<&str>>()
1951 .into_iter()
1952 .rev()
1953 .collect::<String>(),
1954 &expected[start_ix..end_ix]
1955 );
1956
1957 let mut expected_line_starts: Vec<_> = expected[start_ix..end_ix]
1958 .match_indices('\n')
1959 .map(|(index, _)| start_ix + index + 1)
1960 .collect();
1961
1962 let mut chunks = actual.chunks_in_range(start_ix..end_ix);
1963
1964 let mut actual_line_starts = Vec::new();
1965 while chunks.next_line() {
1966 actual_line_starts.push(chunks.offset());
1967 }
1968 assert_eq!(
1969 actual_line_starts,
1970 expected_line_starts,
1971 "actual line starts != expected line starts when using next_line() for {:?} ({:?})",
1972 &expected[start_ix..end_ix],
1973 start_ix..end_ix
1974 );
1975
1976 if start_ix < end_ix
1977 && (start_ix == 0 || expected.as_bytes()[start_ix - 1] == b'\n')
1978 {
1979 expected_line_starts.insert(0, start_ix);
1980 }
1981 // Remove the last index if it starts at the end of the range.
1982 if expected_line_starts.last() == Some(&end_ix) {
1983 expected_line_starts.pop();
1984 }
1985
1986 let mut actual_line_starts = Vec::new();
1987 while chunks.prev_line() {
1988 actual_line_starts.push(chunks.offset());
1989 }
1990 actual_line_starts.reverse();
1991 assert_eq!(
1992 actual_line_starts,
1993 expected_line_starts,
1994 "actual line starts != expected line starts when using prev_line() for {:?} ({:?})",
1995 &expected[start_ix..end_ix],
1996 start_ix..end_ix
1997 );
1998
1999 // Check that next_line/prev_line work correctly from random positions
2000 let mut offset = rng.random_range(start_ix..=end_ix);
2001 while !expected.is_char_boundary(offset) {
2002 offset -= 1;
2003 }
2004 chunks.seek(offset);
2005
2006 for _ in 0..5 {
2007 if rng.random() {
2008 let expected_next_line_start = expected[offset..end_ix]
2009 .find('\n')
2010 .map(|newline_ix| offset + newline_ix + 1);
2011
2012 let moved = chunks.next_line();
2013 assert_eq!(
2014 moved,
2015 expected_next_line_start.is_some(),
2016 "unexpected result from next_line after seeking to {} in range {:?} ({:?})",
2017 offset,
2018 start_ix..end_ix,
2019 &expected[start_ix..end_ix]
2020 );
2021 if let Some(expected_next_line_start) = expected_next_line_start {
2022 assert_eq!(
2023 chunks.offset(),
2024 expected_next_line_start,
2025 "invalid position after seeking to {} in range {:?} ({:?})",
2026 offset,
2027 start_ix..end_ix,
2028 &expected[start_ix..end_ix]
2029 );
2030 } else {
2031 assert_eq!(
2032 chunks.offset(),
2033 end_ix,
2034 "invalid position after seeking to {} in range {:?} ({:?})",
2035 offset,
2036 start_ix..end_ix,
2037 &expected[start_ix..end_ix]
2038 );
2039 }
2040 } else {
2041 let search_end = if offset > 0 && expected.as_bytes()[offset - 1] == b'\n' {
2042 offset - 1
2043 } else {
2044 offset
2045 };
2046
2047 let expected_prev_line_start = expected[..search_end]
2048 .rfind('\n')
2049 .and_then(|newline_ix| {
2050 let line_start_ix = newline_ix + 1;
2051 if line_start_ix >= start_ix {
2052 Some(line_start_ix)
2053 } else {
2054 None
2055 }
2056 })
2057 .or({
2058 if offset > 0 && start_ix == 0 {
2059 Some(0)
2060 } else {
2061 None
2062 }
2063 });
2064
2065 let moved = chunks.prev_line();
2066 assert_eq!(
2067 moved,
2068 expected_prev_line_start.is_some(),
2069 "unexpected result from prev_line after seeking to {} in range {:?} ({:?})",
2070 offset,
2071 start_ix..end_ix,
2072 &expected[start_ix..end_ix]
2073 );
2074 if let Some(expected_prev_line_start) = expected_prev_line_start {
2075 assert_eq!(
2076 chunks.offset(),
2077 expected_prev_line_start,
2078 "invalid position after seeking to {} in range {:?} ({:?})",
2079 offset,
2080 start_ix..end_ix,
2081 &expected[start_ix..end_ix]
2082 );
2083 } else {
2084 assert_eq!(
2085 chunks.offset(),
2086 start_ix,
2087 "invalid position after seeking to {} in range {:?} ({:?})",
2088 offset,
2089 start_ix..end_ix,
2090 &expected[start_ix..end_ix]
2091 );
2092 }
2093 }
2094
2095 assert!((start_ix..=end_ix).contains(&chunks.offset()));
2096 if rng.random() {
2097 offset = rng.random_range(start_ix..=end_ix);
2098 while !expected.is_char_boundary(offset) {
2099 offset -= 1;
2100 }
2101 chunks.seek(offset);
2102 } else {
2103 chunks.next();
2104 offset = chunks.offset();
2105 assert!((start_ix..=end_ix).contains(&chunks.offset()));
2106 }
2107 }
2108 }
2109
2110 let mut offset_utf16 = OffsetUtf16(0);
2111 let mut point = Point::new(0, 0);
2112 let mut point_utf16 = PointUtf16::new(0, 0);
2113 for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
2114 assert_eq!(actual.offset_to_point(ix), point, "offset_to_point({})", ix);
2115 assert_eq!(
2116 actual.offset_to_point_utf16(ix),
2117 point_utf16,
2118 "offset_to_point_utf16({})",
2119 ix
2120 );
2121 assert_eq!(
2122 actual.point_to_offset(point),
2123 ix,
2124 "point_to_offset({:?})",
2125 point
2126 );
2127 assert_eq!(
2128 actual.point_utf16_to_offset(point_utf16),
2129 ix,
2130 "point_utf16_to_offset({:?})",
2131 point_utf16
2132 );
2133 assert_eq!(
2134 actual.offset_to_offset_utf16(ix),
2135 offset_utf16,
2136 "offset_to_offset_utf16({:?})",
2137 ix
2138 );
2139 assert_eq!(
2140 actual.offset_utf16_to_offset(offset_utf16),
2141 ix,
2142 "offset_utf16_to_offset({:?})",
2143 offset_utf16
2144 );
2145 if ch == '\n' {
2146 point += Point::new(1, 0);
2147 point_utf16 += PointUtf16::new(1, 0);
2148 } else {
2149 point.column += ch.len_utf8() as u32;
2150 point_utf16.column += ch.len_utf16() as u32;
2151 }
2152 offset_utf16.0 += ch.len_utf16();
2153 }
2154
2155 let mut offset_utf16 = OffsetUtf16(0);
2156 let mut point_utf16 = Unclipped(PointUtf16::zero());
2157 for unit in expected.encode_utf16() {
2158 let left_offset = actual.clip_offset_utf16(offset_utf16, Bias::Left);
2159 let right_offset = actual.clip_offset_utf16(offset_utf16, Bias::Right);
2160 assert!(right_offset >= left_offset);
2161 // Ensure translating UTF-16 offsets to UTF-8 offsets doesn't panic.
2162 actual.offset_utf16_to_offset(left_offset);
2163 actual.offset_utf16_to_offset(right_offset);
2164
2165 let left_point = actual.clip_point_utf16(point_utf16, Bias::Left);
2166 let right_point = actual.clip_point_utf16(point_utf16, Bias::Right);
2167 assert!(right_point >= left_point);
2168 // Ensure translating valid UTF-16 points to offsets doesn't panic.
2169 actual.point_utf16_to_offset(left_point);
2170 actual.point_utf16_to_offset(right_point);
2171
2172 offset_utf16.0 += 1;
2173 if unit == b'\n' as u16 {
2174 point_utf16.0 += PointUtf16::new(1, 0);
2175 } else {
2176 point_utf16.0 += PointUtf16::new(0, 1);
2177 }
2178 }
2179
2180 for _ in 0..5 {
2181 let end_ix = clip_offset(&expected, rng.random_range(0..=expected.len()), Right);
2182 let start_ix = clip_offset(&expected, rng.random_range(0..=end_ix), Left);
2183 assert_eq!(
2184 actual.cursor(start_ix).summary::<TextSummary>(end_ix),
2185 TextSummary::from(&expected[start_ix..end_ix])
2186 );
2187 }
2188
2189 let mut expected_longest_rows = Vec::new();
2190 let mut longest_line_len = -1_isize;
2191 for (row, line) in expected.split('\n').enumerate() {
2192 let row = row as u32;
2193 assert_eq!(
2194 actual.line_len(row),
2195 line.len() as u32,
2196 "invalid line len for row {}",
2197 row
2198 );
2199
2200 let line_char_count = line.chars().count() as isize;
2201 match line_char_count.cmp(&longest_line_len) {
2202 Ordering::Less => {}
2203 Ordering::Equal => expected_longest_rows.push(row),
2204 Ordering::Greater => {
2205 longest_line_len = line_char_count;
2206 expected_longest_rows.clear();
2207 expected_longest_rows.push(row);
2208 }
2209 }
2210 }
2211
2212 let longest_row = actual.summary().longest_row;
2213 assert!(
2214 expected_longest_rows.contains(&longest_row),
2215 "incorrect longest row {}. expected {:?} with length {}",
2216 longest_row,
2217 expected_longest_rows,
2218 longest_line_len,
2219 );
2220 }
2221 }
2222
2223 #[test]
2224 fn test_chunks_equals_str() {
2225 let text = "This is a multi-chunk\n& multi-line test string!";
2226 let rope = Rope::from(text);
2227 for start in 0..text.len() {
2228 for end in start..text.len() {
2229 let range = start..end;
2230 let correct_substring = &text[start..end];
2231
2232 // Test that correct range returns true
2233 assert!(
2234 rope.chunks_in_range(range.clone())
2235 .equals_str(correct_substring)
2236 );
2237 assert!(
2238 rope.reversed_chunks_in_range(range.clone())
2239 .equals_str(correct_substring)
2240 );
2241
2242 // Test that all other ranges return false (unless they happen to match)
2243 for other_start in 0..text.len() {
2244 for other_end in other_start..text.len() {
2245 if other_start == start && other_end == end {
2246 continue;
2247 }
2248 let other_substring = &text[other_start..other_end];
2249
2250 // Only assert false if the substrings are actually different
2251 if other_substring == correct_substring {
2252 continue;
2253 }
2254 assert!(
2255 !rope
2256 .chunks_in_range(range.clone())
2257 .equals_str(other_substring)
2258 );
2259 assert!(
2260 !rope
2261 .reversed_chunks_in_range(range.clone())
2262 .equals_str(other_substring)
2263 );
2264 }
2265 }
2266 }
2267 }
2268
2269 let rope = Rope::from("");
2270 assert!(rope.chunks_in_range(0..0).equals_str(""));
2271 assert!(rope.reversed_chunks_in_range(0..0).equals_str(""));
2272 assert!(!rope.chunks_in_range(0..0).equals_str("foo"));
2273 assert!(!rope.reversed_chunks_in_range(0..0).equals_str("foo"));
2274 }
2275
2276 #[test]
2277 fn test_is_char_boundary() {
2278 let fixture = "地";
2279 let rope = Rope::from("地");
2280 for b in 0..=fixture.len() {
2281 assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
2282 }
2283 let fixture = "";
2284 let rope = Rope::from("");
2285 for b in 0..=fixture.len() {
2286 assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
2287 }
2288 let fixture = "🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️⚧️🏁🏳️🌈🏴☠️⛳️📬📭🏴🏳️🚩";
2289 let rope = Rope::from("🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️⚧️🏁🏳️🌈🏴☠️⛳️📬📭🏴🏳️🚩");
2290 for b in 0..=fixture.len() {
2291 assert_eq!(rope.is_char_boundary(b), fixture.is_char_boundary(b));
2292 }
2293 }
2294
2295 #[test]
2296 fn test_floor_char_boundary() {
2297 // polyfill of str::floor_char_boundary
2298 fn floor_char_boundary(str: &str, index: usize) -> usize {
2299 if index >= str.len() {
2300 str.len()
2301 } else {
2302 let lower_bound = index.saturating_sub(3);
2303 let new_index = str.as_bytes()[lower_bound..=index]
2304 .iter()
2305 .rposition(|b| (*b as i8) >= -0x40);
2306
2307 lower_bound + new_index.unwrap()
2308 }
2309 }
2310
2311 let fixture = "地";
2312 let rope = Rope::from("地");
2313 for b in 0..=fixture.len() {
2314 assert_eq!(
2315 rope.floor_char_boundary(b),
2316 floor_char_boundary(&fixture, b)
2317 );
2318 }
2319
2320 let fixture = "";
2321 let rope = Rope::from("");
2322 for b in 0..=fixture.len() {
2323 assert_eq!(
2324 rope.floor_char_boundary(b),
2325 floor_char_boundary(&fixture, b)
2326 );
2327 }
2328
2329 let fixture = "🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️⚧️🏁🏳️🌈🏴☠️⛳️📬📭🏴🏳️🚩";
2330 let rope = Rope::from("🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️⚧️🏁🏳️🌈🏴☠️⛳️📬📭🏴🏳️🚩");
2331 for b in 0..=fixture.len() {
2332 assert_eq!(
2333 rope.floor_char_boundary(b),
2334 floor_char_boundary(&fixture, b)
2335 );
2336 }
2337 }
2338
2339 #[test]
2340 fn test_ceil_char_boundary() {
2341 // polyfill of str::ceil_char_boundary
2342 fn ceil_char_boundary(str: &str, index: usize) -> usize {
2343 if index > str.len() {
2344 str.len()
2345 } else {
2346 let upper_bound = Ord::min(index + 4, str.len());
2347 str.as_bytes()[index..upper_bound]
2348 .iter()
2349 .position(|b| (*b as i8) >= -0x40)
2350 .map_or(upper_bound, |pos| pos + index)
2351 }
2352 }
2353
2354 let fixture = "地";
2355 let rope = Rope::from("地");
2356 for b in 0..=fixture.len() {
2357 assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
2358 }
2359
2360 let fixture = "";
2361 let rope = Rope::from("");
2362 for b in 0..=fixture.len() {
2363 assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
2364 }
2365
2366 let fixture = "🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️⚧️🏁🏳️🌈🏴☠️⛳️📬📭🏴🏳️🚩";
2367 let rope = Rope::from("🔴🟠🟡🟢🔵🟣⚫️⚪️🟤\n🏳️⚧️🏁🏳️🌈🏴☠️⛳️📬📭🏴🏳️🚩");
2368 for b in 0..=fixture.len() {
2369 assert_eq!(rope.ceil_char_boundary(b), ceil_char_boundary(&fixture, b));
2370 }
2371 }
2372
2373 #[test]
2374 fn test_to_string_with_line_ending() {
2375 // Test Unix line endings (no conversion)
2376 let rope = Rope::from("line1\nline2\nline3");
2377 assert_eq!(
2378 rope.to_string_with_line_ending(LineEnding::Unix),
2379 "line1\nline2\nline3"
2380 );
2381
2382 // Test Windows line endings
2383 assert_eq!(
2384 rope.to_string_with_line_ending(LineEnding::Windows),
2385 "line1\r\nline2\r\nline3"
2386 );
2387
2388 // Test empty rope
2389 let empty_rope = Rope::from("");
2390 assert_eq!(
2391 empty_rope.to_string_with_line_ending(LineEnding::Windows),
2392 ""
2393 );
2394
2395 // Test single line (no newlines)
2396 let single_line = Rope::from("single line");
2397 assert_eq!(
2398 single_line.to_string_with_line_ending(LineEnding::Windows),
2399 "single line"
2400 );
2401
2402 // Test rope ending with newline
2403 let ending_newline = Rope::from("line1\nline2\n");
2404 assert_eq!(
2405 ending_newline.to_string_with_line_ending(LineEnding::Windows),
2406 "line1\r\nline2\r\n"
2407 );
2408
2409 // Test large rope with multiple chunks
2410 let mut large_rope = Rope::new();
2411 for i in 0..100 {
2412 large_rope.push(&format!("line{}\n", i));
2413 }
2414 let result = large_rope.to_string_with_line_ending(LineEnding::Windows);
2415 assert!(result.contains("\r\n"));
2416 assert!(!result.contains("\n\n"));
2417 assert_eq!(result.matches("\r\n").count(), 100);
2418 }
2419
2420 fn clip_offset(text: &str, mut offset: usize, bias: Bias) -> usize {
2421 while !text.is_char_boundary(offset) {
2422 match bias {
2423 Bias::Left => offset -= 1,
2424 Bias::Right => offset += 1,
2425 }
2426 }
2427 offset
2428 }
2429
2430 impl Rope {
2431 fn text(&self) -> String {
2432 let mut text = String::new();
2433 for chunk in self.chunks.cursor::<()>(()) {
2434 text.push_str(&chunk.text);
2435 }
2436 text
2437 }
2438 }
2439}