text.rs

  1use super::Point;
  2use crate::sum_tree::{self, SeekBias, SumTree};
  3use arrayvec::ArrayVec;
  4use std::{
  5    cmp,
  6    fmt::{self, Debug},
  7    ops::{Bound, Index, Range, RangeBounds},
  8    sync::Arc,
  9};
 10
 11#[derive(Copy, Clone, Debug, Eq, PartialEq)]
 12enum Run {
 13    Newline,
 14    Chars { len: usize, char_size: u8 },
 15}
 16
 17#[derive(Clone, Copy, Debug, Default, Eq, Ord, PartialEq, PartialOrd)]
 18struct ByteOffset(usize);
 19
 20impl sum_tree::Item for Run {
 21    type Summary = TextSummary;
 22
 23    fn summary(&self) -> Self::Summary {
 24        match *self {
 25            Run::Newline => TextSummary {
 26                chars: 1,
 27                bytes: 1,
 28                lines: Point::new(1, 0),
 29                first_line_len: 0,
 30                rightmost_point: Point::new(0, 0),
 31            },
 32            Run::Chars { len, char_size } => TextSummary {
 33                chars: len,
 34                bytes: len * char_size as usize,
 35                lines: Point::new(0, len as u32),
 36                first_line_len: len as u32,
 37                rightmost_point: Point::new(0, len as u32),
 38            },
 39        }
 40    }
 41}
 42
 43impl Run {
 44    fn char_size(&self) -> u8 {
 45        match self {
 46            Run::Newline => 1,
 47            Run::Chars { char_size, .. } => *char_size,
 48        }
 49    }
 50}
 51
 52#[derive(Clone, Debug, Default, Eq, PartialEq)]
 53pub struct TextSummary {
 54    pub chars: usize,
 55    pub bytes: usize,
 56    pub lines: Point,
 57    pub first_line_len: u32,
 58    pub rightmost_point: Point,
 59}
 60
 61impl sum_tree::Summary for TextSummary {
 62    type Context = ();
 63
 64    fn add_summary(&mut self, other: &Self, _: &()) {
 65        *self += other;
 66    }
 67}
 68
 69impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
 70    fn add_assign(&mut self, other: &'a Self) {
 71        let joined_line_len = self.lines.column + other.first_line_len;
 72        if joined_line_len > self.rightmost_point.column {
 73            self.rightmost_point = Point::new(self.lines.row, joined_line_len);
 74        }
 75        if other.rightmost_point.column > self.rightmost_point.column {
 76            self.rightmost_point = self.lines + &other.rightmost_point;
 77        }
 78
 79        if self.lines.row == 0 {
 80            self.first_line_len += other.first_line_len;
 81        }
 82
 83        self.chars += other.chars;
 84        self.bytes += other.bytes;
 85        self.lines += &other.lines;
 86    }
 87}
 88
 89impl std::ops::AddAssign<Self> for TextSummary {
 90    fn add_assign(&mut self, other: Self) {
 91        *self += &other;
 92    }
 93}
 94
 95impl<'a> sum_tree::Dimension<'a, TextSummary> for TextSummary {
 96    fn add_summary(&mut self, other: &TextSummary) {
 97        *self += other;
 98    }
 99}
100
101impl<'a> sum_tree::Dimension<'a, TextSummary> for Point {
102    fn add_summary(&mut self, summary: &TextSummary) {
103        *self += &summary.lines;
104    }
105}
106
107impl<'a> sum_tree::Dimension<'a, TextSummary> for ByteOffset {
108    fn add_summary(&mut self, summary: &TextSummary) {
109        self.0 += summary.bytes
110    }
111}
112
113impl<'a> sum_tree::Dimension<'a, TextSummary> for usize {
114    fn add_summary(&mut self, summary: &TextSummary) {
115        *self += summary.chars;
116    }
117}
118
119#[derive(Clone)]
120pub struct Text {
121    text: Arc<str>,
122    runs: SumTree<Run>,
123    range: Range<usize>,
124}
125
126impl From<String> for Text {
127    fn from(text: String) -> Self {
128        Self::from(Arc::from(text))
129    }
130}
131
132impl<'a> From<&'a str> for Text {
133    fn from(text: &'a str) -> Self {
134        Self::from(Arc::from(text))
135    }
136}
137
138impl From<Arc<str>> for Text {
139    fn from(text: Arc<str>) -> Self {
140        let mut runs = Vec::new();
141
142        let mut chars_len = 0;
143        let mut run_char_size = 0;
144        let mut run_chars = 0;
145
146        let mut chars = text.chars();
147        loop {
148            let ch = chars.next();
149            let ch_size = ch.map_or(0, |ch| ch.len_utf8());
150            if run_chars != 0 && (ch.is_none() || ch == Some('\n') || run_char_size != ch_size) {
151                runs.push(Run::Chars {
152                    len: run_chars,
153                    char_size: run_char_size as u8,
154                });
155                run_chars = 0;
156            }
157            run_char_size = ch_size;
158
159            match ch {
160                Some('\n') => runs.push(Run::Newline),
161                Some(_) => run_chars += 1,
162                None => break,
163            }
164            chars_len += 1;
165        }
166
167        let mut tree = SumTree::new();
168        tree.extend(runs, &());
169        Text {
170            text,
171            runs: tree,
172            range: 0..chars_len,
173        }
174    }
175}
176
177impl Debug for Text {
178    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179        f.debug_tuple("Text").field(&self.as_str()).finish()
180    }
181}
182
183impl PartialEq for Text {
184    fn eq(&self, other: &Self) -> bool {
185        self.text == other.text
186    }
187}
188
189impl Eq for Text {}
190
191impl<T: RangeBounds<usize>> Index<T> for Text {
192    type Output = str;
193
194    fn index(&self, range: T) -> &Self::Output {
195        let start = match range.start_bound() {
196            Bound::Included(start) => cmp::min(self.range.start + start, self.range.end),
197            Bound::Excluded(_) => unimplemented!(),
198            Bound::Unbounded => self.range.start,
199        };
200        let end = match range.end_bound() {
201            Bound::Included(end) => cmp::min(self.range.start + end + 1, self.range.end),
202            Bound::Excluded(end) => cmp::min(self.range.start + end, self.range.end),
203            Bound::Unbounded => self.range.end,
204        };
205
206        let byte_start = self.abs_byte_offset_for_offset(start);
207        let byte_end = self.abs_byte_offset_for_offset(end);
208        &self.text[byte_start..byte_end]
209    }
210}
211
212impl Text {
213    pub fn range(&self) -> Range<usize> {
214        self.range.clone()
215    }
216
217    pub fn as_str(&self) -> &str {
218        &self[..]
219    }
220
221    pub fn slice<T: RangeBounds<usize>>(&self, range: T) -> Text {
222        let start = match range.start_bound() {
223            Bound::Included(start) => cmp::min(self.range.start + start, self.range.end),
224            Bound::Excluded(_) => unimplemented!(),
225            Bound::Unbounded => self.range.start,
226        };
227        let end = match range.end_bound() {
228            Bound::Included(end) => cmp::min(self.range.start + end + 1, self.range.end),
229            Bound::Excluded(end) => cmp::min(self.range.start + end, self.range.end),
230            Bound::Unbounded => self.range.end,
231        };
232
233        Text {
234            text: self.text.clone(),
235            runs: self.runs.clone(),
236            range: start..end,
237        }
238    }
239
240    pub fn line_len(&self, row: u32) -> u32 {
241        let mut cursor = self.runs.cursor::<usize, Point>();
242        cursor.seek(&self.range.start, SeekBias::Right, &());
243        let absolute_row = cursor.start().row + row;
244
245        let mut cursor = self.runs.cursor::<Point, usize>();
246        cursor.seek(&Point::new(absolute_row, 0), SeekBias::Right, &());
247        let prefix_len = self.range.start.saturating_sub(*cursor.start());
248        let line_len =
249            cursor.summary::<usize>(&Point::new(absolute_row + 1, 0), SeekBias::Left, &());
250        let suffix_len = cursor.start().saturating_sub(self.range.end);
251
252        line_len
253            .saturating_sub(prefix_len)
254            .saturating_sub(suffix_len) as u32
255    }
256
257    pub fn len(&self) -> usize {
258        self.range.end - self.range.start
259    }
260
261    pub fn lines(&self) -> Point {
262        self.abs_point_for_offset(self.range.end) - &self.abs_point_for_offset(self.range.start)
263    }
264
265    pub fn rightmost_point(&self) -> Point {
266        let lines = self.lines();
267
268        let mut candidates = ArrayVec::<[Point; 3]>::new();
269        candidates.push(lines);
270        if lines.row > 0 {
271            candidates.push(Point::new(0, self.line_len(0)));
272            if lines.row > 1 {
273                let mut cursor = self.runs.cursor::<usize, Point>();
274                cursor.seek(&self.range.start, SeekBias::Right, &());
275                let absolute_start_row = cursor.start().row;
276
277                let mut cursor = self.runs.cursor::<Point, usize>();
278                cursor.seek(&Point::new(absolute_start_row + 1, 0), SeekBias::Right, &());
279                let summary = cursor.summary::<TextSummary>(
280                    &Point::new(absolute_start_row + lines.row, 0),
281                    SeekBias::Left,
282                    &(),
283                );
284
285                candidates.push(Point::new(1, 0) + &summary.rightmost_point);
286            }
287        }
288
289        candidates.into_iter().max_by_key(|p| p.column).unwrap()
290    }
291
292    pub fn point_for_offset(&self, offset: usize) -> Point {
293        self.abs_point_for_offset(self.range.start + offset)
294            - &self.abs_point_for_offset(self.range.start)
295    }
296
297    pub fn offset_for_point(&self, point: Point) -> usize {
298        let mut cursor = self.runs.cursor::<Point, TextSummary>();
299        let abs_point = self.abs_point_for_offset(self.range.start) + &point;
300        cursor.seek(&abs_point, SeekBias::Right, &());
301        let overshoot = abs_point - &cursor.start().lines;
302        let abs_offset = cursor.start().chars + overshoot.column as usize;
303        abs_offset - self.range.start
304    }
305
306    pub fn summary(&self) -> TextSummary {
307        TextSummary {
308            chars: self.range.end - self.range.start,
309            bytes: self.abs_byte_offset_for_offset(self.range.end)
310                - self.abs_byte_offset_for_offset(self.range.start),
311            lines: self.abs_point_for_offset(self.range.end)
312                - &self.abs_point_for_offset(self.range.start),
313            first_line_len: self.line_len(0),
314            rightmost_point: self.rightmost_point(),
315        }
316    }
317
318    fn abs_point_for_offset(&self, offset: usize) -> Point {
319        let mut cursor = self.runs.cursor::<usize, TextSummary>();
320        cursor.seek(&offset, SeekBias::Right, &());
321        let overshoot = (offset - cursor.start().chars) as u32;
322        cursor.start().lines + &Point::new(0, overshoot)
323    }
324
325    fn abs_byte_offset_for_offset(&self, offset: usize) -> usize {
326        let mut cursor = self.runs.cursor::<usize, TextSummary>();
327        cursor.seek(&offset, SeekBias::Right, &());
328        let overshoot = offset - cursor.start().chars;
329        cursor.start().bytes + overshoot * cursor.item().map_or(0, |run| run.char_size()) as usize
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336    use std::collections::HashSet;
337    use std::iter::FromIterator;
338
339    #[test]
340    fn test_basic() {
341        let text = Text::from(String::from("ab\ncd€\nfghij\nkl¢m"));
342        assert_eq!(text.len(), 17);
343        assert_eq!(text.as_str(), "ab\ncd€\nfghij\nkl¢m");
344        assert_eq!(text.lines(), Point::new(3, 4));
345        assert_eq!(text.line_len(0), 2);
346        assert_eq!(text.line_len(1), 3);
347        assert_eq!(text.line_len(2), 5);
348        assert_eq!(text.line_len(3), 4);
349        assert_eq!(text.rightmost_point(), Point::new(2, 5));
350
351        let b_to_g = text.slice(1..9);
352        assert_eq!(b_to_g.as_str(), "b\ncd€\nfg");
353        assert_eq!(b_to_g.len(), 8);
354        assert_eq!(b_to_g.lines(), Point::new(2, 2));
355        assert_eq!(b_to_g.line_len(0), 1);
356        assert_eq!(b_to_g.line_len(1), 3);
357        assert_eq!(b_to_g.line_len(2), 2);
358        assert_eq!(b_to_g.line_len(3), 0);
359        assert_eq!(b_to_g.rightmost_point(), Point::new(1, 3));
360
361        let d_to_i = text.slice(4..11);
362        assert_eq!(d_to_i.as_str(), "d€\nfghi");
363        assert_eq!(&d_to_i[1..5], "\nfg");
364        assert_eq!(d_to_i.len(), 7);
365        assert_eq!(d_to_i.lines(), Point::new(1, 4));
366        assert_eq!(d_to_i.line_len(0), 2);
367        assert_eq!(d_to_i.line_len(1), 4);
368        assert_eq!(d_to_i.line_len(2), 0);
369        assert_eq!(d_to_i.rightmost_point(), Point::new(1, 4));
370
371        let d_to_j = text.slice(4..=11);
372        assert_eq!(d_to_j.as_str(), "d€\nfghij");
373        assert_eq!(&d_to_j[1..], "\nfghij");
374        assert_eq!(d_to_j.len(), 8);
375    }
376
377    #[test]
378    fn test_random() {
379        use rand::prelude::*;
380
381        for seed in 0..100 {
382            println!("buffer::text seed: {}", seed);
383            let rng = &mut StdRng::seed_from_u64(seed);
384
385            let len = rng.gen_range(0..50);
386            let mut string = String::new();
387            for _ in 0..len {
388                if rng.gen_ratio(1, 5) {
389                    string.push('\n');
390                } else {
391                    string.push(rng.gen());
392                }
393            }
394            let text = Text::from(string.clone());
395
396            for _ in 0..10 {
397                let start = rng.gen_range(0..text.len() + 1);
398                let end = rng.gen_range(start..text.len() + 2);
399
400                let string_slice = string
401                    .chars()
402                    .skip(start)
403                    .take(end - start)
404                    .collect::<String>();
405                let expected_line_endpoints = string_slice
406                    .split('\n')
407                    .enumerate()
408                    .map(|(row, line)| Point::new(row as u32, line.chars().count() as u32))
409                    .collect::<Vec<_>>();
410                let text_slice = text.slice(start..end);
411
412                assert_eq!(text_slice.lines(), lines(&string_slice));
413
414                let mut rightmost_points: HashSet<Point> = HashSet::new();
415                for endpoint in &expected_line_endpoints {
416                    if let Some(rightmost_point) = rightmost_points.iter().next().cloned() {
417                        if endpoint.column > rightmost_point.column {
418                            rightmost_points.clear();
419                        }
420                        if endpoint.column >= rightmost_point.column {
421                            rightmost_points.insert(*endpoint);
422                        }
423                    } else {
424                        rightmost_points.insert(*endpoint);
425                    }
426
427                    assert_eq!(text_slice.line_len(endpoint.row as u32), endpoint.column);
428                }
429
430                assert!(rightmost_points.contains(&text_slice.rightmost_point()));
431
432                for _ in 0..10 {
433                    let offset = rng.gen_range(0..string_slice.chars().count() + 1);
434                    let point = lines(&string_slice.chars().take(offset).collect::<String>());
435                    assert_eq!(text_slice.point_for_offset(offset), point);
436                    assert_eq!(text_slice.offset_for_point(point), offset);
437                    if offset < string_slice.chars().count() {
438                        assert_eq!(
439                            &text_slice[offset..offset + 1],
440                            String::from_iter(string_slice.chars().nth(offset)).as_str()
441                        );
442                    }
443                }
444            }
445        }
446    }
447
448    pub fn lines(s: &str) -> Point {
449        let mut row = 0;
450        let mut column = 0;
451        for ch in s.chars() {
452            if ch == '\n' {
453                row += 1;
454                column = 0;
455            } else {
456                column += 1;
457            }
458        }
459        Point::new(row, column)
460    }
461}