Cargo.lock 🔗
@@ -9898,6 +9898,7 @@ dependencies = [
"gpui",
"log",
"rand 0.8.5",
+ "rayon",
"smallvec",
"sum_tree",
"unicode-segmentation",
Antonio Scandurra created
This pull request introduces an index of Unicode codepoints, newlines
and UTF-16 codepoints.
Benchmarks worth a thousand words:
```
push/4096 time: [467.06 µs 470.07 µs 473.24 µs]
thrpt: [8.2543 MiB/s 8.3100 MiB/s 8.3635 MiB/s]
change:
time: [-4.1462% -3.0990% -2.0527%] (p = 0.00 < 0.05)
thrpt: [+2.0957% +3.1981% +4.3255%]
Performance has improved.
Found 3 outliers among 100 measurements (3.00%)
1 (1.00%) low mild
2 (2.00%) high mild
push/65536 time: [1.4650 ms 1.4796 ms 1.4922 ms]
thrpt: [41.885 MiB/s 42.242 MiB/s 42.664 MiB/s]
change:
time: [-3.2871% -2.3489% -1.4555%] (p = 0.00 < 0.05)
thrpt: [+1.4770% +2.4054% +3.3988%]
Performance has improved.
Found 6 outliers among 100 measurements (6.00%)
3 (3.00%) low severe
3 (3.00%) low mild
append/4096 time: [729.00 ns 730.57 ns 732.14 ns]
thrpt: [5.2103 GiB/s 5.2215 GiB/s 5.2327 GiB/s]
change:
time: [-81.884% -81.836% -81.790%] (p = 0.00 < 0.05)
thrpt: [+449.16% +450.53% +452.01%]
Performance has improved.
Found 11 outliers among 100 measurements (11.00%)
3 (3.00%) low mild
6 (6.00%) high mild
2 (2.00%) high severe
append/65536 time: [504.44 ns 505.58 ns 506.77 ns]
thrpt: [120.44 GiB/s 120.72 GiB/s 121.00 GiB/s]
change:
time: [-94.833% -94.807% -94.782%] (p = 0.00 < 0.05)
thrpt: [+1816.3% +1825.8% +1835.5%]
Performance has improved.
Found 4 outliers among 100 measurements (4.00%)
3 (3.00%) high mild
1 (1.00%) high severe
slice/4096 time: [29.661 µs 29.733 µs 29.816 µs]
thrpt: [131.01 MiB/s 131.38 MiB/s 131.70 MiB/s]
change:
time: [-48.833% -48.533% -48.230%] (p = 0.00 < 0.05)
thrpt: [+93.161% +94.298% +95.440%]
Performance has improved.
slice/65536 time: [588.00 µs 590.22 µs 592.17 µs]
thrpt: [105.54 MiB/s 105.89 MiB/s 106.29 MiB/s]
change:
time: [-45.599% -45.347% -45.099%] (p = 0.00 < 0.05)
thrpt: [+82.147% +82.971% +83.821%]
Performance has improved.
Found 2 outliers among 100 measurements (2.00%)
1 (1.00%) low severe
1 (1.00%) high mild
bytes_in_range/4096 time: [3.8630 µs 3.8811 µs 3.8994 µs]
thrpt: [1001.8 MiB/s 1006.5 MiB/s 1011.2 MiB/s]
change:
time: [+0.0600% +0.6000% +1.1833%] (p = 0.03 < 0.05)
thrpt: [-1.1695% -0.5964% -0.0600%]
Change within noise threshold.
bytes_in_range/65536 time: [98.178 µs 98.545 µs 98.931 µs]
thrpt: [631.75 MiB/s 634.23 MiB/s 636.60 MiB/s]
change:
time: [-0.6513% +0.7537% +2.2265%] (p = 0.30 > 0.05)
thrpt: [-2.1780% -0.7481% +0.6555%]
No change in performance detected.
Found 11 outliers among 100 measurements (11.00%)
8 (8.00%) high mild
3 (3.00%) high severe
chars/4096 time: [878.91 ns 879.45 ns 880.06 ns]
thrpt: [4.3346 GiB/s 4.3376 GiB/s 4.3403 GiB/s]
change:
time: [+9.1679% +9.4000% +9.6304%] (p = 0.00 < 0.05)
thrpt: [-8.7844% -8.5923% -8.3979%]
Performance has regressed.
Found 8 outliers among 100 measurements (8.00%)
1 (1.00%) low severe
1 (1.00%) low mild
3 (3.00%) high mild
3 (3.00%) high severe
chars/65536 time: [15.615 µs 15.691 µs 15.757 µs]
thrpt: [3.8735 GiB/s 3.8899 GiB/s 3.9087 GiB/s]
change:
time: [+5.4902% +5.9345% +6.4044%] (p = 0.00 < 0.05)
thrpt: [-6.0190% -5.6021% -5.2045%]
Performance has regressed.
Found 2 outliers among 100 measurements (2.00%)
2 (2.00%) low mild
clip_point/4096 time: [29.677 µs 29.835 µs 30.019 µs]
thrpt: [130.13 MiB/s 130.93 MiB/s 131.63 MiB/s]
change:
time: [-46.306% -45.866% -45.436%] (p = 0.00 < 0.05)
thrpt: [+83.272% +84.728% +86.240%]
Performance has improved.
Found 11 outliers among 100 measurements (11.00%)
3 (3.00%) high mild
8 (8.00%) high severe
clip_point/65536 time: [1.5933 ms 1.6116 ms 1.6311 ms]
thrpt: [38.318 MiB/s 38.782 MiB/s 39.226 MiB/s]
change:
time: [-30.388% -29.598% -28.717%] (p = 0.00 < 0.05)
thrpt: [+40.286% +42.040% +43.653%]
Performance has improved.
Found 3 outliers among 100 measurements (3.00%)
3 (3.00%) high mild
running 0 tests
test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured; 7 filtered out; finished in 0.00s
point_to_offset/4096 time: [14.493 µs 14.591 µs 14.707 µs]
thrpt: [265.61 MiB/s 267.72 MiB/s 269.52 MiB/s]
change:
time: [-71.990% -71.787% -71.588%] (p = 0.00 < 0.05)
thrpt: [+251.96% +254.45% +257.01%]
Performance has improved.
Found 9 outliers among 100 measurements (9.00%)
5 (5.00%) high mild
4 (4.00%) high severe
point_to_offset/65536 time: [700.72 µs 713.75 µs 727.26 µs]
thrpt: [85.939 MiB/s 87.566 MiB/s 89.194 MiB/s]
change:
time: [-61.778% -61.015% -60.256%] (p = 0.00 < 0.05)
thrpt: [+151.61% +156.51% +161.63%]
Performance has improved.
```
Calling `Rope::chars` got slightly slower but I don't think it's a big
issue (we don't really call `chars` for an entire `Rope`).
In a future pull request, I want to use the tab index (which we're not
yet using) and the char index to make `TabMap` a lot faster.
Release Notes:
- N/A
Cargo.lock | 1
Cargo.toml | 1
crates/rope/Cargo.toml | 1
crates/rope/benches/rope_benchmark.rs | 19
crates/rope/src/chunk.rs | 878 +++++++++++++++++++++++++++++
crates/rope/src/rope.rs | 446 ++++----------
crates/rope/src/unclipped.rs | 6
crates/sum_tree/Cargo.toml | 2
8 files changed, 1,029 insertions(+), 325 deletions(-)
@@ -9898,6 +9898,7 @@ dependencies = [
"gpui",
"log",
"rand 0.8.5",
+ "rayon",
"smallvec",
"sum_tree",
"unicode-segmentation",
@@ -392,6 +392,7 @@ prost-build = "0.9"
prost-types = "0.9"
pulldown-cmark = { version = "0.12.0", default-features = false }
rand = "0.8.5"
+rayon = "1.8"
regex = "1.5"
repair_json = "0.1.0"
reqwest = { git = "https://github.com/zed-industries/reqwest.git", rev = "fd110f6998da16bbca97b6dddda9be7827c50e29", default-features = false, features = [
@@ -14,6 +14,7 @@ path = "src/rope.rs"
[dependencies]
arrayvec = "0.7.1"
log.workspace = true
+rayon.workspace = true
smallvec.workspace = true
sum_tree.workspace = true
unicode-segmentation.workspace = true
@@ -171,6 +171,25 @@ fn rope_benchmarks(c: &mut Criterion) {
});
}
group.finish();
+
+ let mut group = c.benchmark_group("point_to_offset");
+ for size in sizes.iter() {
+ group.throughput(Throughput::Bytes(*size as u64));
+ group.bench_with_input(BenchmarkId::from_parameter(size), &size, |b, &size| {
+ let rope = generate_random_rope(rng.clone(), *size);
+
+ b.iter_batched(
+ || generate_random_rope_points(rng.clone(), &rope),
+ |offsets| {
+ for offset in offsets.iter() {
+ black_box(rope.point_to_offset(*offset));
+ }
+ },
+ BatchSize::SmallInput,
+ );
+ });
+ }
+ group.finish();
}
criterion_group!(benches, rope_benchmarks);
@@ -0,0 +1,878 @@
+use crate::{OffsetUtf16, Point, PointUtf16, TextSummary, Unclipped};
+use arrayvec::ArrayString;
+use std::{cmp, ops::Range};
+use sum_tree::Bias;
+use unicode_segmentation::GraphemeCursor;
+use util::debug_panic;
+
+pub(crate) const MIN_BASE: usize = if cfg!(test) { 6 } else { 64 };
+pub(crate) const MAX_BASE: usize = MIN_BASE * 2;
+
+#[derive(Clone, Debug, Default)]
+pub struct Chunk {
+ chars: u128,
+ chars_utf16: u128,
+ newlines: u128,
+ pub text: ArrayString<MAX_BASE>,
+}
+
+impl Chunk {
+ #[inline(always)]
+ pub fn new(text: &str) -> Self {
+ let mut this = Chunk::default();
+ this.push_str(text);
+ this
+ }
+
+ #[inline(always)]
+ pub fn push_str(&mut self, text: &str) {
+ for (char_ix, c) in text.char_indices() {
+ let ix = self.text.len() + char_ix;
+ self.chars |= 1 << ix;
+ self.chars_utf16 |= 1 << ix;
+ self.chars_utf16 |= (c.len_utf16() as u128) << ix;
+ self.newlines |= ((c == '\n') as u128) << ix;
+ }
+ self.text.push_str(text);
+ }
+
+ #[inline(always)]
+ pub fn append(&mut self, slice: ChunkSlice) {
+ if slice.is_empty() {
+ return;
+ };
+
+ let base_ix = self.text.len();
+ self.chars |= slice.chars << base_ix;
+ self.chars_utf16 |= slice.chars_utf16 << base_ix;
+ self.newlines |= slice.newlines << base_ix;
+ self.text.push_str(&slice.text);
+ }
+
+ #[inline(always)]
+ pub fn as_slice(&self) -> ChunkSlice {
+ ChunkSlice {
+ chars: self.chars,
+ chars_utf16: self.chars_utf16,
+ newlines: self.newlines,
+ text: &self.text,
+ }
+ }
+
+ #[inline(always)]
+ pub fn slice(&self, range: Range<usize>) -> ChunkSlice {
+ self.as_slice().slice(range)
+ }
+}
+
+#[derive(Clone, Copy, Debug)]
+pub struct ChunkSlice<'a> {
+ chars: u128,
+ chars_utf16: u128,
+ newlines: u128,
+ text: &'a str,
+}
+
+impl<'a> Into<Chunk> for ChunkSlice<'a> {
+ fn into(self) -> Chunk {
+ Chunk {
+ chars: self.chars,
+ chars_utf16: self.chars_utf16,
+ newlines: self.newlines,
+ text: self.text.try_into().unwrap(),
+ }
+ }
+}
+
+impl<'a> ChunkSlice<'a> {
+ #[inline(always)]
+ pub fn is_empty(self) -> bool {
+ self.text.is_empty()
+ }
+
+ #[inline(always)]
+ pub fn is_char_boundary(self, offset: usize) -> bool {
+ self.text.is_char_boundary(offset)
+ }
+
+ #[inline(always)]
+ pub fn split_at(self, mid: usize) -> (ChunkSlice<'a>, ChunkSlice<'a>) {
+ if mid == MAX_BASE {
+ let left = self;
+ let right = ChunkSlice {
+ chars: 0,
+ chars_utf16: 0,
+ newlines: 0,
+ text: "",
+ };
+ (left, right)
+ } else {
+ let mask = if mid == MAX_BASE {
+ u128::MAX
+ } else {
+ (1u128 << mid) - 1
+ };
+ let (left_text, right_text) = self.text.split_at(mid);
+ let left = ChunkSlice {
+ chars: self.chars & mask,
+ chars_utf16: self.chars_utf16 & mask,
+ newlines: self.newlines & mask,
+ text: left_text,
+ };
+ let right = ChunkSlice {
+ chars: self.chars >> mid,
+ chars_utf16: self.chars_utf16 >> mid,
+ newlines: self.newlines >> mid,
+ text: right_text,
+ };
+ (left, right)
+ }
+ }
+
+ #[inline(always)]
+ pub fn slice(self, range: Range<usize>) -> Self {
+ let mask = if range.end == MAX_BASE {
+ u128::MAX
+ } else {
+ (1u128 << range.end) - 1
+ };
+ if range.start == MAX_BASE {
+ Self {
+ chars: 0,
+ chars_utf16: 0,
+ newlines: 0,
+ text: "",
+ }
+ } else {
+ Self {
+ chars: (self.chars & mask) >> range.start,
+ chars_utf16: (self.chars_utf16 & mask) >> range.start,
+ newlines: (self.newlines & mask) >> range.start,
+ text: &self.text[range],
+ }
+ }
+ }
+
+ #[inline(always)]
+ pub fn text_summary(&self) -> TextSummary {
+ let (longest_row, longest_row_chars) = self.longest_row();
+ TextSummary {
+ len: self.len(),
+ len_utf16: self.len_utf16(),
+ lines: self.lines(),
+ first_line_chars: self.first_line_chars(),
+ last_line_chars: self.last_line_chars(),
+ last_line_len_utf16: self.last_line_len_utf16(),
+ longest_row,
+ longest_row_chars,
+ }
+ }
+
+ /// Get length in bytes
+ #[inline(always)]
+ pub fn len(&self) -> usize {
+ self.text.len()
+ }
+
+ /// Get length in UTF-16 code units
+ #[inline(always)]
+ pub fn len_utf16(&self) -> OffsetUtf16 {
+ OffsetUtf16(self.chars_utf16.count_ones() as usize)
+ }
+
+ /// Get point representing number of lines and length of last line
+ #[inline(always)]
+ pub fn lines(&self) -> Point {
+ let row = self.newlines.count_ones();
+ let column = self.newlines.leading_zeros() - (u128::BITS - self.text.len() as u32);
+ Point::new(row, column)
+ }
+
+ /// Get number of chars in first line
+ #[inline(always)]
+ pub fn first_line_chars(&self) -> u32 {
+ if self.newlines == 0 {
+ self.chars.count_ones()
+ } else {
+ let mask = (1u128 << self.newlines.trailing_zeros()) - 1;
+ (self.chars & mask).count_ones()
+ }
+ }
+
+ /// Get number of chars in last line
+ #[inline(always)]
+ pub fn last_line_chars(&self) -> u32 {
+ if self.newlines == 0 {
+ self.chars.count_ones()
+ } else {
+ let mask = !(u128::MAX >> self.newlines.leading_zeros());
+ (self.chars & mask).count_ones()
+ }
+ }
+
+ /// Get number of UTF-16 code units in last line
+ #[inline(always)]
+ pub fn last_line_len_utf16(&self) -> u32 {
+ if self.newlines == 0 {
+ self.chars_utf16.count_ones()
+ } else {
+ let mask = !(u128::MAX >> self.newlines.leading_zeros());
+ (self.chars_utf16 & mask).count_ones()
+ }
+ }
+
+ /// Get the longest row in the chunk and its length in characters.
+ #[inline(always)]
+ pub fn longest_row(&self) -> (u32, u32) {
+ let mut chars = self.chars;
+ let mut newlines = self.newlines;
+ let mut row = 0;
+ let mut longest_row = 0;
+ let mut longest_row_chars = 0;
+ while newlines > 0 {
+ let newline_ix = newlines.trailing_zeros();
+ let row_chars = (chars & ((1 << newline_ix) - 1)).count_ones() as u8;
+ if row_chars > longest_row_chars {
+ longest_row = row;
+ longest_row_chars = row_chars;
+ }
+
+ newlines >>= newline_ix;
+ newlines >>= 1;
+ chars >>= newline_ix;
+ chars >>= 1;
+ row += 1;
+ }
+
+ let row_chars = chars.count_ones() as u8;
+ if row_chars > longest_row_chars {
+ (row, row_chars as u32)
+ } else {
+ (longest_row, longest_row_chars as u32)
+ }
+ }
+
+ #[inline(always)]
+ pub fn offset_to_point(&self, offset: usize) -> Point {
+ let mask = if offset == MAX_BASE {
+ u128::MAX
+ } else {
+ (1u128 << offset) - 1
+ };
+ let row = (self.newlines & mask).count_ones();
+ let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
+ let column = (offset - newline_ix as usize) as u32;
+ Point::new(row, column)
+ }
+
+ #[inline(always)]
+ pub fn point_to_offset(&self, point: Point) -> usize {
+ if point.row > self.lines().row {
+ debug_panic!(
+ "point {:?} extends beyond rows for string {:?}",
+ point,
+ self.text
+ );
+ return self.len();
+ }
+
+ let row_offset_range = self.offset_range_for_row(point.row);
+ if point.column > row_offset_range.len() as u32 {
+ debug_panic!(
+ "point {:?} extends beyond row for string {:?}",
+ point,
+ self.text
+ );
+ row_offset_range.end
+ } else {
+ row_offset_range.start + point.column as usize
+ }
+ }
+
+ #[inline(always)]
+ pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
+ let mask = if offset == MAX_BASE {
+ u128::MAX
+ } else {
+ (1u128 << offset) - 1
+ };
+ OffsetUtf16((self.chars_utf16 & mask).count_ones() as usize)
+ }
+
+ #[inline(always)]
+ pub fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
+ if target.0 == 0 {
+ 0
+ } else {
+ let ix = nth_set_bit(self.chars_utf16, target.0) + 1;
+ if ix == MAX_BASE {
+ MAX_BASE
+ } else {
+ let utf8_additional_len = cmp::min(
+ (self.chars_utf16 >> ix).trailing_zeros() as usize,
+ self.text.len() - ix,
+ );
+ ix + utf8_additional_len
+ }
+ }
+ }
+
+ #[inline(always)]
+ pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
+ let mask = if offset == MAX_BASE {
+ u128::MAX
+ } else {
+ (1u128 << offset) - 1
+ };
+ let row = (self.newlines & mask).count_ones();
+ let newline_ix = u128::BITS - (self.newlines & mask).leading_zeros();
+ let column = if newline_ix as usize == MAX_BASE {
+ 0
+ } else {
+ ((self.chars_utf16 & mask) >> newline_ix).count_ones()
+ };
+ PointUtf16::new(row, column)
+ }
+
+ #[inline(always)]
+ pub fn point_to_point_utf16(&self, point: Point) -> PointUtf16 {
+ self.offset_to_point_utf16(self.point_to_offset(point))
+ }
+
+ #[inline(always)]
+ pub fn point_utf16_to_offset(&self, point: PointUtf16, clip: bool) -> usize {
+ let lines = self.lines();
+ if point.row > lines.row {
+ if !clip {
+ debug_panic!(
+ "point {:?} is beyond this chunk's extent {:?}",
+ point,
+ self.text
+ );
+ }
+ return self.len();
+ }
+
+ let row_offset_range = self.offset_range_for_row(point.row);
+ let line = self.slice(row_offset_range.clone());
+ if point.column > line.last_line_len_utf16() {
+ if !clip {
+ debug_panic!(
+ "point {:?} is beyond the end of the line in chunk {:?}",
+ point,
+ self.text
+ );
+ }
+ return line.len();
+ }
+
+ let mut offset = row_offset_range.start;
+ if point.column > 0 {
+ offset += line.offset_utf16_to_offset(OffsetUtf16(point.column as usize));
+ if !self.text.is_char_boundary(offset) {
+ offset -= 1;
+ while !self.text.is_char_boundary(offset) {
+ offset -= 1;
+ }
+ if !clip {
+ debug_panic!(
+ "point {:?} is within character in chunk {:?}",
+ point,
+ self.text,
+ );
+ }
+ }
+ }
+ offset
+ }
+
+ #[inline(always)]
+ pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
+ let max_point = self.lines();
+ if point.0.row > max_point.row {
+ return max_point;
+ }
+
+ let row_offset_range = self.offset_range_for_row(point.0.row);
+ let line = self.slice(row_offset_range.clone());
+ if point.0.column == 0 {
+ Point::new(point.0.row, 0)
+ } else if point.0.column >= line.len_utf16().0 as u32 {
+ Point::new(point.0.row, line.len() as u32)
+ } else {
+ let mut column = line.offset_utf16_to_offset(OffsetUtf16(point.0.column as usize));
+ while !line.text.is_char_boundary(column) {
+ column -= 1;
+ }
+ Point::new(point.0.row, column as u32)
+ }
+ }
+
+ #[inline(always)]
+ pub fn clip_point(&self, point: Point, bias: Bias) -> Point {
+ let max_point = self.lines();
+ if point.row > max_point.row {
+ return max_point;
+ }
+
+ let line = self.slice(self.offset_range_for_row(point.row));
+ if point.column == 0 {
+ point
+ } else if point.column >= line.len() as u32 {
+ Point::new(point.row, line.len() as u32)
+ } else {
+ let mut column = point.column as usize;
+ let bytes = line.text.as_bytes();
+ if bytes[column - 1] < 128 && bytes[column] < 128 {
+ return Point::new(point.row, column as u32);
+ }
+
+ let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
+ loop {
+ if line.is_char_boundary(column)
+ && grapheme_cursor.is_boundary(line.text, 0).unwrap_or(false)
+ {
+ break;
+ }
+
+ match bias {
+ Bias::Left => column -= 1,
+ Bias::Right => column += 1,
+ }
+ grapheme_cursor.set_cursor(column);
+ }
+ Point::new(point.row, column as u32)
+ }
+ }
+
+ #[inline(always)]
+ pub fn clip_point_utf16(&self, point: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
+ let max_point = self.lines();
+ if point.0.row > max_point.row {
+ PointUtf16::new(max_point.row, self.last_line_len_utf16())
+ } else {
+ let line = self.slice(self.offset_range_for_row(point.0.row));
+ let column = line.clip_offset_utf16(OffsetUtf16(point.0.column as usize), bias);
+ PointUtf16::new(point.0.row, column.0 as u32)
+ }
+ }
+
+ #[inline(always)]
+ pub fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
+ if target == OffsetUtf16::default() {
+ OffsetUtf16::default()
+ } else if target >= self.len_utf16() {
+ self.len_utf16()
+ } else {
+ let mut offset = self.offset_utf16_to_offset(target);
+ while !self.text.is_char_boundary(offset) {
+ if bias == Bias::Left {
+ offset -= 1;
+ } else {
+ offset += 1;
+ }
+ }
+ self.offset_to_offset_utf16(offset)
+ }
+ }
+
+ #[inline(always)]
+ fn offset_range_for_row(&self, row: u32) -> Range<usize> {
+ let row_start = if row > 0 {
+ nth_set_bit(self.newlines, row as usize) + 1
+ } else {
+ 0
+ };
+ let row_len = if row_start == MAX_BASE {
+ 0
+ } else {
+ cmp::min(
+ (self.newlines >> row_start).trailing_zeros(),
+ (self.text.len() - row_start) as u32,
+ )
+ };
+ row_start..row_start + row_len as usize
+ }
+}
+
+/// Finds the n-th bit that is set to 1.
+#[inline(always)]
+fn nth_set_bit(v: u128, n: usize) -> usize {
+ let low = v as u64;
+ let high = (v >> 64) as u64;
+
+ let low_count = low.count_ones() as usize;
+ if n > low_count {
+ 64 + nth_set_bit_u64(high, (n - low_count) as u64) as usize
+ } else {
+ nth_set_bit_u64(low, n as u64) as usize
+ }
+}
+
+#[inline(always)]
+fn nth_set_bit_u64(v: u64, mut n: u64) -> u64 {
+ let v = v.reverse_bits();
+ let mut s: u64 = 64;
+
+ // Parallel bit count intermediates
+ let a = v - ((v >> 1) & (u64::MAX / 3));
+ let b = (a & (u64::MAX / 5)) + ((a >> 2) & (u64::MAX / 5));
+ let c = (b + (b >> 4)) & (u64::MAX / 0x11);
+ let d = (c + (c >> 8)) & (u64::MAX / 0x101);
+
+ // Branchless select
+ let t = (d >> 32) + (d >> 48);
+ s -= (t.wrapping_sub(n) & 256) >> 3;
+ n -= t & (t.wrapping_sub(n) >> 8);
+
+ let t = (d >> (s - 16)) & 0xff;
+ s -= (t.wrapping_sub(n) & 256) >> 4;
+ n -= t & (t.wrapping_sub(n) >> 8);
+
+ let t = (c >> (s - 8)) & 0xf;
+ s -= (t.wrapping_sub(n) & 256) >> 5;
+ n -= t & (t.wrapping_sub(n) >> 8);
+
+ let t = (b >> (s - 4)) & 0x7;
+ s -= (t.wrapping_sub(n) & 256) >> 6;
+ n -= t & (t.wrapping_sub(n) >> 8);
+
+ let t = (a >> (s - 2)) & 0x3;
+ s -= (t.wrapping_sub(n) & 256) >> 7;
+ n -= t & (t.wrapping_sub(n) >> 8);
+
+ let t = (v >> (s - 1)) & 0x1;
+ s -= (t.wrapping_sub(n) & 256) >> 8;
+
+ 65 - s - 1
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use rand::prelude::*;
+ use util::RandomCharIter;
+
+ #[gpui::test(iterations = 100)]
+ fn test_random_chunks(mut rng: StdRng) {
+ let chunk_len = rng.gen_range(0..=MAX_BASE);
+ let text = RandomCharIter::new(&mut rng)
+ .take(chunk_len)
+ .collect::<String>();
+ let mut ix = chunk_len;
+ while !text.is_char_boundary(ix) {
+ ix -= 1;
+ }
+ let text = &text[..ix];
+
+ log::info!("Chunk: {:?}", text);
+ let chunk = Chunk::new(&text);
+ verify_chunk(chunk.as_slice(), text);
+
+ for _ in 0..10 {
+ let mut start = rng.gen_range(0..=chunk.text.len());
+ let mut end = rng.gen_range(start..=chunk.text.len());
+ while !chunk.text.is_char_boundary(start) {
+ start -= 1;
+ }
+ while !chunk.text.is_char_boundary(end) {
+ end -= 1;
+ }
+ let range = start..end;
+ log::info!("Range: {:?}", range);
+ let text_slice = &text[range.clone()];
+ let chunk_slice = chunk.slice(range);
+ verify_chunk(chunk_slice, text_slice);
+ }
+ }
+
+ #[gpui::test(iterations = 1000)]
+ fn test_nth_set_bit_random(mut rng: StdRng) {
+ let set_count = rng.gen_range(0..=128);
+ let mut set_bits = (0..128).choose_multiple(&mut rng, set_count);
+ set_bits.sort();
+ let mut n = 0;
+ for ix in set_bits.iter().copied() {
+ n |= 1 << ix;
+ }
+
+ for (mut ix, position) in set_bits.into_iter().enumerate() {
+ ix += 1;
+ assert_eq!(
+ nth_set_bit(n, ix),
+ position,
+ "nth_set_bit({:0128b}, {})",
+ n,
+ ix
+ );
+ }
+ }
+
+ fn verify_chunk(chunk: ChunkSlice<'_>, text: &str) {
+ let mut offset = 0;
+ let mut offset_utf16 = OffsetUtf16(0);
+ let mut point = Point::zero();
+ let mut point_utf16 = PointUtf16::zero();
+
+ log::info!("Verifying chunk {:?}", text);
+ assert_eq!(chunk.offset_to_point(0), Point::zero());
+
+ for c in text.chars() {
+ let expected_point = chunk.offset_to_point(offset);
+ assert_eq!(point, expected_point, "mismatch at offset {}", offset);
+ assert_eq!(
+ chunk.point_to_offset(point),
+ offset,
+ "mismatch at point {:?}",
+ point
+ );
+ assert_eq!(
+ chunk.offset_to_offset_utf16(offset),
+ offset_utf16,
+ "mismatch at offset {}",
+ offset
+ );
+ assert_eq!(
+ chunk.offset_utf16_to_offset(offset_utf16),
+ offset,
+ "mismatch at offset_utf16 {:?}",
+ offset_utf16
+ );
+ assert_eq!(
+ chunk.point_to_point_utf16(point),
+ point_utf16,
+ "mismatch at point {:?}",
+ point
+ );
+ assert_eq!(
+ chunk.point_utf16_to_offset(point_utf16, false),
+ offset,
+ "mismatch at point_utf16 {:?}",
+ point_utf16
+ );
+ assert_eq!(
+ chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
+ point,
+ "mismatch for unclipped_point_utf16_to_point at {:?}",
+ point_utf16
+ );
+
+ assert_eq!(
+ chunk.clip_point(point, Bias::Left),
+ point,
+ "incorrect left clip at {:?}",
+ point
+ );
+ assert_eq!(
+ chunk.clip_point(point, Bias::Right),
+ point,
+ "incorrect right clip at {:?}",
+ point
+ );
+
+ for i in 1..c.len_utf8() {
+ let test_point = Point::new(point.row, point.column + i as u32);
+ assert_eq!(
+ chunk.clip_point(test_point, Bias::Left),
+ point,
+ "incorrect left clip within multi-byte char at {:?}",
+ test_point
+ );
+ assert_eq!(
+ chunk.clip_point(test_point, Bias::Right),
+ Point::new(point.row, point.column + c.len_utf8() as u32),
+ "incorrect right clip within multi-byte char at {:?}",
+ test_point
+ );
+ }
+
+ for i in 1..c.len_utf16() {
+ let test_point = Unclipped(PointUtf16::new(
+ point_utf16.row,
+ point_utf16.column + i as u32,
+ ));
+ assert_eq!(
+ chunk.unclipped_point_utf16_to_point(test_point),
+ point,
+ "incorrect unclipped_point_utf16_to_point within multi-byte char at {:?}",
+ test_point
+ );
+ assert_eq!(
+ chunk.clip_point_utf16(test_point, Bias::Left),
+ point_utf16,
+ "incorrect left clip_point_utf16 within multi-byte char at {:?}",
+ test_point
+ );
+ assert_eq!(
+ chunk.clip_point_utf16(test_point, Bias::Right),
+ PointUtf16::new(point_utf16.row, point_utf16.column + c.len_utf16() as u32),
+ "incorrect right clip_point_utf16 within multi-byte char at {:?}",
+ test_point
+ );
+
+ let test_offset = OffsetUtf16(offset_utf16.0 + i);
+ assert_eq!(
+ chunk.clip_offset_utf16(test_offset, Bias::Left),
+ offset_utf16,
+ "incorrect left clip_offset_utf16 within multi-byte char at {:?}",
+ test_offset
+ );
+ assert_eq!(
+ chunk.clip_offset_utf16(test_offset, Bias::Right),
+ OffsetUtf16(offset_utf16.0 + c.len_utf16()),
+ "incorrect right clip_offset_utf16 within multi-byte char at {:?}",
+ test_offset
+ );
+ }
+
+ if c == '\n' {
+ point.row += 1;
+ point.column = 0;
+ point_utf16.row += 1;
+ point_utf16.column = 0;
+ } else {
+ point.column += c.len_utf8() as u32;
+ point_utf16.column += c.len_utf16() as u32;
+ }
+
+ offset += c.len_utf8();
+ offset_utf16.0 += c.len_utf16();
+ }
+
+ let final_point = chunk.offset_to_point(offset);
+ assert_eq!(point, final_point, "mismatch at final offset {}", offset);
+ assert_eq!(
+ chunk.point_to_offset(point),
+ offset,
+ "mismatch at point {:?}",
+ point
+ );
+ assert_eq!(
+ chunk.offset_to_offset_utf16(offset),
+ offset_utf16,
+ "mismatch at offset {}",
+ offset
+ );
+ assert_eq!(
+ chunk.offset_utf16_to_offset(offset_utf16),
+ offset,
+ "mismatch at offset_utf16 {:?}",
+ offset_utf16
+ );
+ assert_eq!(
+ chunk.point_to_point_utf16(point),
+ point_utf16,
+ "mismatch at final point {:?}",
+ point
+ );
+ assert_eq!(
+ chunk.point_utf16_to_offset(point_utf16, false),
+ offset,
+ "mismatch at final point_utf16 {:?}",
+ point_utf16
+ );
+ assert_eq!(
+ chunk.unclipped_point_utf16_to_point(Unclipped(point_utf16)),
+ point,
+ "mismatch for unclipped_point_utf16_to_point at final point {:?}",
+ point_utf16
+ );
+ assert_eq!(
+ chunk.clip_point(point, Bias::Left),
+ point,
+ "incorrect left clip at final point {:?}",
+ point
+ );
+ assert_eq!(
+ chunk.clip_point(point, Bias::Right),
+ point,
+ "incorrect right clip at final point {:?}",
+ point
+ );
+ assert_eq!(
+ chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Left),
+ point_utf16,
+ "incorrect left clip_point_utf16 at final point {:?}",
+ point_utf16
+ );
+ assert_eq!(
+ chunk.clip_point_utf16(Unclipped(point_utf16), Bias::Right),
+ point_utf16,
+ "incorrect right clip_point_utf16 at final point {:?}",
+ point_utf16
+ );
+ assert_eq!(
+ chunk.clip_offset_utf16(offset_utf16, Bias::Left),
+ offset_utf16,
+ "incorrect left clip_offset_utf16 at final offset {:?}",
+ offset_utf16
+ );
+ assert_eq!(
+ chunk.clip_offset_utf16(offset_utf16, Bias::Right),
+ offset_utf16,
+ "incorrect right clip_offset_utf16 at final offset {:?}",
+ offset_utf16
+ );
+
+ // Verify length methods
+ assert_eq!(chunk.len(), text.len());
+ assert_eq!(
+ chunk.len_utf16().0,
+ text.chars().map(|c| c.len_utf16()).sum::<usize>()
+ );
+
+ // Verify line counting
+ let lines = chunk.lines();
+ let mut newline_count = 0;
+ let mut last_line_len = 0;
+ for c in text.chars() {
+ if c == '\n' {
+ newline_count += 1;
+ last_line_len = 0;
+ } else {
+ last_line_len += c.len_utf8() as u32;
+ }
+ }
+ assert_eq!(lines, Point::new(newline_count, last_line_len));
+
+ // Verify first/last line chars
+ if !text.is_empty() {
+ let first_line = text.split('\n').next().unwrap();
+ assert_eq!(chunk.first_line_chars(), first_line.chars().count() as u32);
+
+ let last_line = text.split('\n').last().unwrap();
+ assert_eq!(chunk.last_line_chars(), last_line.chars().count() as u32);
+ assert_eq!(
+ chunk.last_line_len_utf16(),
+ last_line.chars().map(|c| c.len_utf16() as u32).sum::<u32>()
+ );
+ }
+
+ // Verify longest row
+ let (longest_row, longest_chars) = chunk.longest_row();
+ let mut max_chars = 0;
+ let mut current_row = 0;
+ let mut current_chars = 0;
+ let mut max_row = 0;
+
+ for c in text.chars() {
+ if c == '\n' {
+ if current_chars > max_chars {
+ max_chars = current_chars;
+ max_row = current_row;
+ }
+ current_row += 1;
+ current_chars = 0;
+ } else {
+ current_chars += 1;
+ }
+ }
+
+ if current_chars > max_chars {
+ max_chars = current_chars;
+ max_row = current_row;
+ }
+
+ assert_eq!((max_row, max_chars as u32), (longest_row, longest_chars));
+ }
+}
@@ -1,9 +1,11 @@
+mod chunk;
mod offset_utf16;
mod point;
mod point_utf16;
mod unclipped;
-use arrayvec::ArrayString;
+use chunk::{Chunk, ChunkSlice};
+use rayon::iter::{IntoParallelIterator, ParallelIterator as _};
use smallvec::SmallVec;
use std::{
cmp, fmt, io, mem,
@@ -11,20 +13,12 @@ use std::{
str,
};
use sum_tree::{Bias, Dimension, SumTree};
-use unicode_segmentation::GraphemeCursor;
-use util::debug_panic;
pub use offset_utf16::OffsetUtf16;
pub use point::Point;
pub use point_utf16::PointUtf16;
pub use unclipped::Unclipped;
-#[cfg(test)]
-const CHUNK_BASE: usize = 6;
-
-#[cfg(not(test))]
-const CHUNK_BASE: usize = 64;
-
#[derive(Clone, Default)]
pub struct Rope {
chunks: SumTree<Chunk>,
@@ -36,18 +30,25 @@ impl Rope {
}
pub fn append(&mut self, rope: Rope) {
- let mut chunks = rope.chunks.cursor::<()>(&());
- chunks.next(&());
- if let Some(chunk) = chunks.item() {
- if self.chunks.last().map_or(false, |c| c.0.len() < CHUNK_BASE)
- || chunk.0.len() < CHUNK_BASE
+ if let Some(chunk) = rope.chunks.first() {
+ if self
+ .chunks
+ .last()
+ .map_or(false, |c| c.text.len() < chunk::MIN_BASE)
+ || chunk.text.len() < chunk::MIN_BASE
{
- self.push(&chunk.0);
+ self.push_chunk(chunk.as_slice());
+
+ let mut chunks = rope.chunks.cursor::<()>(&());
+ chunks.next(&());
chunks.next(&());
+ self.chunks.append(chunks.suffix(&()), &());
+ self.check_invariants();
+ return;
}
}
- self.chunks.append(chunks.suffix(&()), &());
+ self.chunks.append(rope.chunks.clone(), &());
self.check_invariants();
}
@@ -77,11 +78,13 @@ impl Rope {
pub fn push(&mut self, mut text: &str) {
self.chunks.update_last(
|last_chunk| {
- let split_ix = if last_chunk.0.len() + text.len() <= 2 * CHUNK_BASE {
+ let split_ix = if last_chunk.text.len() + text.len() <= chunk::MAX_BASE {
text.len()
} else {
- let mut split_ix =
- cmp::min(CHUNK_BASE.saturating_sub(last_chunk.0.len()), text.len());
+ let mut split_ix = cmp::min(
+ chunk::MIN_BASE.saturating_sub(last_chunk.text.len()),
+ text.len(),
+ );
while !text.is_char_boundary(split_ix) {
split_ix += 1;
}
@@ -89,7 +92,7 @@ impl Rope {
};
let (suffix, remainder) = text.split_at(split_ix);
- last_chunk.0.push_str(suffix);
+ last_chunk.push_str(suffix);
text = remainder;
},
&(),
@@ -101,12 +104,12 @@ impl Rope {
let mut new_chunks = SmallVec::<[_; 16]>::new();
while !text.is_empty() {
- let mut split_ix = cmp::min(2 * CHUNK_BASE, text.len());
+ let mut split_ix = cmp::min(chunk::MAX_BASE, text.len());
while !text.is_char_boundary(split_ix) {
split_ix -= 1;
}
let (chunk, remainder) = text.split_at(split_ix);
- new_chunks.push(Chunk(ArrayString::from(chunk).unwrap()));
+ new_chunks.push(chunk);
text = remainder;
}
@@ -116,9 +119,11 @@ impl Rope {
const PARALLEL_THRESHOLD: usize = 4 * (2 * sum_tree::TREE_BASE);
if new_chunks.len() >= PARALLEL_THRESHOLD {
- self.chunks.par_extend(new_chunks.into_vec(), &());
+ self.chunks
+ .par_extend(new_chunks.into_vec().into_par_iter().map(Chunk::new), &());
} else {
- self.chunks.extend(new_chunks, &());
+ self.chunks
+ .extend(new_chunks.into_iter().map(Chunk::new), &());
}
self.check_invariants();
@@ -135,7 +140,7 @@ impl Rope {
// a chunk ends with 3 bytes of a 4-byte character. These 3 bytes end up being stored in the following chunk, thus wasting
// 3 bytes of storage in current chunk.
// For example, a 1024-byte string can occupy between 32 (full ASCII, 1024/32) and 36 (full 4-byte UTF-8, 1024 / 29 rounded up) chunks.
- const MIN_CHUNK_SIZE: usize = 2 * CHUNK_BASE - 3;
+ const MIN_CHUNK_SIZE: usize = chunk::MAX_BASE - 3;
// We also round up the capacity up by one, for a good measure; we *really* don't want to realloc here, as we assume that the # of characters
// we're working with there is large.
@@ -143,12 +148,12 @@ impl Rope {
let mut new_chunks = Vec::with_capacity(capacity);
while !text.is_empty() {
- let mut split_ix = cmp::min(2 * CHUNK_BASE, text.len());
+ let mut split_ix = cmp::min(chunk::MAX_BASE, text.len());
while !text.is_char_boundary(split_ix) {
split_ix -= 1;
}
let (chunk, remainder) = text.split_at(split_ix);
- new_chunks.push(Chunk(ArrayString::from(chunk).unwrap()));
+ new_chunks.push(chunk);
text = remainder;
}
@@ -158,13 +163,44 @@ impl Rope {
const PARALLEL_THRESHOLD: usize = 4 * (2 * sum_tree::TREE_BASE);
if new_chunks.len() >= PARALLEL_THRESHOLD {
- self.chunks.par_extend(new_chunks, &());
+ self.chunks
+ .par_extend(new_chunks.into_par_iter().map(Chunk::new), &());
} else {
- self.chunks.extend(new_chunks, &());
+ self.chunks
+ .extend(new_chunks.into_iter().map(Chunk::new), &());
}
self.check_invariants();
}
+
+ fn push_chunk(&mut self, mut chunk: ChunkSlice) {
+ self.chunks.update_last(
+ |last_chunk| {
+ let split_ix = if last_chunk.text.len() + chunk.len() <= chunk::MAX_BASE {
+ chunk.len()
+ } else {
+ let mut split_ix = cmp::min(
+ chunk::MIN_BASE.saturating_sub(last_chunk.text.len()),
+ chunk.len(),
+ );
+ while !chunk.is_char_boundary(split_ix) {
+ split_ix += 1;
+ }
+ split_ix
+ };
+
+ let (suffix, remainder) = chunk.split_at(split_ix);
+ last_chunk.append(suffix);
+ chunk = remainder;
+ },
+ &(),
+ );
+
+ if !chunk.is_empty() {
+ self.chunks.push(chunk.into(), &());
+ }
+ }
+
pub fn push_front(&mut self, text: &str) {
let suffix = mem::replace(self, Rope::from(text));
self.append(suffix);
@@ -178,7 +214,7 @@ impl Rope {
let mut chunks = self.chunks.cursor::<()>(&()).peekable();
while let Some(chunk) = chunks.next() {
if chunks.peek().is_some() {
- assert!(chunk.0.len() + 3 >= CHUNK_BASE);
+ assert!(chunk.text.len() + 3 >= chunk::MIN_BASE);
}
}
}
@@ -250,7 +286,7 @@ impl Rope {
let overshoot = offset - cursor.start().0;
cursor.start().1
+ cursor.item().map_or(Default::default(), |chunk| {
- chunk.offset_to_offset_utf16(overshoot)
+ chunk.as_slice().offset_to_offset_utf16(overshoot)
})
}
@@ -263,7 +299,7 @@ impl Rope {
let overshoot = offset - cursor.start().0;
cursor.start().1
+ cursor.item().map_or(Default::default(), |chunk| {
- chunk.offset_utf16_to_offset(overshoot)
+ chunk.as_slice().offset_utf16_to_offset(overshoot)
})
}
@@ -275,9 +311,9 @@ impl Rope {
cursor.seek(&offset, Bias::Left, &());
let overshoot = offset - cursor.start().0;
cursor.start().1
- + cursor
- .item()
- .map_or(Point::zero(), |chunk| chunk.offset_to_point(overshoot))
+ + cursor.item().map_or(Point::zero(), |chunk| {
+ chunk.as_slice().offset_to_point(overshoot)
+ })
}
pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
@@ -289,7 +325,7 @@ impl Rope {
let overshoot = offset - cursor.start().0;
cursor.start().1
+ cursor.item().map_or(PointUtf16::zero(), |chunk| {
- chunk.offset_to_point_utf16(overshoot)
+ chunk.as_slice().offset_to_point_utf16(overshoot)
})
}
@@ -302,7 +338,7 @@ impl Rope {
let overshoot = point - cursor.start().0;
cursor.start().1
+ cursor.item().map_or(PointUtf16::zero(), |chunk| {
- chunk.point_to_point_utf16(overshoot)
+ chunk.as_slice().point_to_point_utf16(overshoot)
})
}
@@ -316,7 +352,7 @@ impl Rope {
cursor.start().1
+ cursor
.item()
- .map_or(0, |chunk| chunk.point_to_offset(overshoot))
+ .map_or(0, |chunk| chunk.as_slice().point_to_offset(overshoot))
}
pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
@@ -335,9 +371,9 @@ impl Rope {
cursor.seek(&point, Bias::Left, &());
let overshoot = point - cursor.start().0;
cursor.start().1
- + cursor
- .item()
- .map_or(0, |chunk| chunk.point_utf16_to_offset(overshoot, clip))
+ + cursor.item().map_or(0, |chunk| {
+ chunk.as_slice().point_utf16_to_offset(overshoot, clip)
+ })
}
pub fn unclipped_point_utf16_to_point(&self, point: Unclipped<PointUtf16>) -> Point {
@@ -349,7 +385,7 @@ impl Rope {
let overshoot = Unclipped(point.0 - cursor.start().0);
cursor.start().1
+ cursor.item().map_or(Point::zero(), |chunk| {
- chunk.unclipped_point_utf16_to_point(overshoot)
+ chunk.as_slice().unclipped_point_utf16_to_point(overshoot)
})
}
@@ -358,7 +394,7 @@ impl Rope {
cursor.seek(&offset, Bias::Left, &());
if let Some(chunk) = cursor.item() {
let mut ix = offset - cursor.start();
- while !chunk.0.is_char_boundary(ix) {
+ while !chunk.text.is_char_boundary(ix) {
match bias {
Bias::Left => {
ix -= 1;
@@ -381,7 +417,7 @@ impl Rope {
cursor.seek(&offset, Bias::Right, &());
if let Some(chunk) = cursor.item() {
let overshoot = offset - cursor.start();
- *cursor.start() + chunk.clip_offset_utf16(overshoot, bias)
+ *cursor.start() + chunk.as_slice().clip_offset_utf16(overshoot, bias)
} else {
self.summary().len_utf16
}
@@ -392,7 +428,7 @@ impl Rope {
cursor.seek(&point, Bias::Right, &());
if let Some(chunk) = cursor.item() {
let overshoot = point - cursor.start();
- *cursor.start() + chunk.clip_point(overshoot, bias)
+ *cursor.start() + chunk.as_slice().clip_point(overshoot, bias)
} else {
self.summary().lines
}
@@ -403,7 +439,7 @@ impl Rope {
cursor.seek(&point.0, Bias::Right, &());
if let Some(chunk) = cursor.item() {
let overshoot = Unclipped(point.0 - cursor.start());
- *cursor.start() + chunk.clip_point_utf16(overshoot, bias)
+ *cursor.start() + chunk.as_slice().clip_point_utf16(overshoot, bias)
} else {
self.summary().lines_utf16()
}
@@ -500,7 +536,7 @@ impl<'a> Cursor<'a> {
if let Some(start_chunk) = self.chunks.item() {
let start_ix = self.offset - self.chunks.start();
let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
- slice.push(&start_chunk.0[start_ix..end_ix]);
+ slice.push_chunk(start_chunk.slice(start_ix..end_ix));
}
if end_offset > self.chunks.end(&()) {
@@ -510,7 +546,7 @@ impl<'a> Cursor<'a> {
});
if let Some(end_chunk) = self.chunks.item() {
let end_ix = end_offset - self.chunks.start();
- slice.push(&end_chunk.0[..end_ix]);
+ slice.push_chunk(end_chunk.slice(0..end_ix));
}
}
@@ -525,9 +561,7 @@ impl<'a> Cursor<'a> {
if let Some(start_chunk) = self.chunks.item() {
let start_ix = self.offset - self.chunks.start();
let end_ix = cmp::min(end_offset, self.chunks.end(&())) - self.chunks.start();
- summary.add_assign(&D::from_text_summary(&TextSummary::from(
- &start_chunk.0[start_ix..end_ix],
- )));
+ summary.add_assign(&D::from_chunk(start_chunk.slice(start_ix..end_ix)));
}
if end_offset > self.chunks.end(&()) {
@@ -535,9 +569,7 @@ impl<'a> Cursor<'a> {
summary.add_assign(&self.chunks.summary(&end_offset, Bias::Right, &()));
if let Some(end_chunk) = self.chunks.item() {
let end_ix = end_offset - self.chunks.start();
- summary.add_assign(&D::from_text_summary(&TextSummary::from(
- &end_chunk.0[..end_ix],
- )));
+ summary.add_assign(&D::from_chunk(end_chunk.slice(0..end_ix)));
}
}
@@ -678,11 +710,11 @@ impl<'a> Chunks<'a> {
if let Some(chunk) = self.chunks.item() {
let mut end_ix = self.offset - *self.chunks.start();
- if chunk.0.as_bytes()[end_ix - 1] == b'\n' {
+ if chunk.text.as_bytes()[end_ix - 1] == b'\n' {
end_ix -= 1;
}
- if let Some(newline_ix) = chunk.0[..end_ix].rfind('\n') {
+ if let Some(newline_ix) = chunk.text[..end_ix].rfind('\n') {
self.offset = *self.chunks.start() + newline_ix + 1;
if self.offset_is_valid() {
return true;
@@ -694,7 +726,7 @@ impl<'a> Chunks<'a> {
.search_backward(|summary| summary.text.lines.row > 0, &());
self.offset = *self.chunks.start();
if let Some(chunk) = self.chunks.item() {
- if let Some(newline_ix) = chunk.0.rfind('\n') {
+ if let Some(newline_ix) = chunk.text.rfind('\n') {
self.offset += newline_ix + 1;
if self.offset_is_valid() {
if self.offset == self.chunks.end(&()) {
@@ -731,7 +763,7 @@ impl<'a> Chunks<'a> {
slice_start..slice_end
};
- Some(&chunk.0[slice_range])
+ Some(&chunk.text[slice_range])
}
pub fn lines(self) -> Lines<'a> {
@@ -798,7 +830,7 @@ impl<'a> Bytes<'a> {
}
let start = self.range.start.saturating_sub(chunk_start);
let end = self.range.end - chunk_start;
- Some(&chunk.0.as_bytes()[start..chunk.0.len().min(end)])
+ Some(&chunk.text.as_bytes()[start..chunk.text.len().min(end)])
}
}
@@ -902,265 +934,13 @@ impl<'a> Lines<'a> {
}
}
-#[derive(Clone, Debug, Default)]
-struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
-
-impl Chunk {
- fn offset_to_offset_utf16(&self, target: usize) -> OffsetUtf16 {
- let mut offset = 0;
- let mut offset_utf16 = OffsetUtf16(0);
- for ch in self.0.chars() {
- if offset >= target {
- break;
- }
-
- offset += ch.len_utf8();
- offset_utf16.0 += ch.len_utf16();
- }
- offset_utf16
- }
-
- fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
- let mut offset_utf16 = OffsetUtf16(0);
- let mut offset = 0;
- for ch in self.0.chars() {
- if offset_utf16 >= target {
- break;
- }
-
- offset += ch.len_utf8();
- offset_utf16.0 += ch.len_utf16();
- }
- offset
- }
-
- fn offset_to_point(&self, target: usize) -> Point {
- let mut offset = 0;
- let mut point = Point::new(0, 0);
- for ch in self.0.chars() {
- if offset >= target {
- break;
- }
-
- if ch == '\n' {
- point.row += 1;
- point.column = 0;
- } else {
- point.column += ch.len_utf8() as u32;
- }
- offset += ch.len_utf8();
- }
- point
- }
-
- fn offset_to_point_utf16(&self, target: usize) -> PointUtf16 {
- let mut offset = 0;
- let mut point = PointUtf16::new(0, 0);
- for ch in self.0.chars() {
- if offset >= target {
- break;
- }
-
- if ch == '\n' {
- point.row += 1;
- point.column = 0;
- } else {
- point.column += ch.len_utf16() as u32;
- }
- offset += ch.len_utf8();
- }
- point
- }
-
- fn point_to_offset(&self, target: Point) -> usize {
- let mut offset = 0;
- let mut point = Point::new(0, 0);
-
- for ch in self.0.chars() {
- if point >= target {
- if point > target {
- debug_panic!("point {target:?} is inside of character {ch:?}");
- }
- break;
- }
-
- if ch == '\n' {
- point.row += 1;
- point.column = 0;
-
- if point.row > target.row {
- debug_panic!(
- "point {target:?} is beyond the end of a line with length {}",
- point.column
- );
- break;
- }
- } else {
- point.column += ch.len_utf8() as u32;
- }
-
- offset += ch.len_utf8();
- }
-
- offset
- }
-
- fn point_to_point_utf16(&self, target: Point) -> PointUtf16 {
- let mut point = Point::zero();
- let mut point_utf16 = PointUtf16::new(0, 0);
- for ch in self.0.chars() {
- if point >= target {
- break;
- }
-
- if ch == '\n' {
- point_utf16.row += 1;
- point_utf16.column = 0;
- point.row += 1;
- point.column = 0;
- } else {
- point_utf16.column += ch.len_utf16() as u32;
- point.column += ch.len_utf8() as u32;
- }
- }
- point_utf16
- }
-
- fn point_utf16_to_offset(&self, target: PointUtf16, clip: bool) -> usize {
- let mut offset = 0;
- let mut point = PointUtf16::new(0, 0);
-
- for ch in self.0.chars() {
- if point == target {
- break;
- }
-
- if ch == '\n' {
- point.row += 1;
- point.column = 0;
-
- if point.row > target.row {
- if !clip {
- debug_panic!(
- "point {target:?} is beyond the end of a line with length {}",
- point.column
- );
- }
- // Return the offset of the newline
- return offset;
- }
- } else {
- point.column += ch.len_utf16() as u32;
- }
-
- if point > target {
- if !clip {
- debug_panic!("point {target:?} is inside of codepoint {ch:?}");
- }
- // Return the offset of the codepoint which we have landed within, bias left
- return offset;
- }
-
- offset += ch.len_utf8();
- }
-
- offset
- }
-
- fn unclipped_point_utf16_to_point(&self, target: Unclipped<PointUtf16>) -> Point {
- let mut point = Point::zero();
- let mut point_utf16 = PointUtf16::zero();
-
- for ch in self.0.chars() {
- if point_utf16 == target.0 {
- break;
- }
-
- if point_utf16 > target.0 {
- // If the point is past the end of a line or inside of a code point,
- // return the last valid point before the target.
- return point;
- }
-
- if ch == '\n' {
- point_utf16 += PointUtf16::new(1, 0);
- point += Point::new(1, 0);
- } else {
- point_utf16 += PointUtf16::new(0, ch.len_utf16() as u32);
- point += Point::new(0, ch.len_utf8() as u32);
- }
- }
-
- point
- }
-
- fn clip_point(&self, target: Point, bias: Bias) -> Point {
- for (row, line) in self.0.split('\n').enumerate() {
- if row == target.row as usize {
- let bytes = line.as_bytes();
- let mut column = target.column.min(bytes.len() as u32) as usize;
- if column == 0
- || column == bytes.len()
- || (bytes[column - 1] < 128 && bytes[column] < 128)
- {
- return Point::new(row as u32, column as u32);
- }
-
- let mut grapheme_cursor = GraphemeCursor::new(column, bytes.len(), true);
- loop {
- if line.is_char_boundary(column)
- && grapheme_cursor.is_boundary(line, 0).unwrap_or(false)
- {
- break;
- }
-
- match bias {
- Bias::Left => column -= 1,
- Bias::Right => column += 1,
- }
- grapheme_cursor.set_cursor(column);
- }
- return Point::new(row as u32, column as u32);
- }
- }
- unreachable!()
- }
-
- fn clip_point_utf16(&self, target: Unclipped<PointUtf16>, bias: Bias) -> PointUtf16 {
- for (row, line) in self.0.split('\n').enumerate() {
- if row == target.0.row as usize {
- let mut code_units = line.encode_utf16();
- let mut column = code_units.by_ref().take(target.0.column as usize).count();
- if char::decode_utf16(code_units).next().transpose().is_err() {
- match bias {
- Bias::Left => column -= 1,
- Bias::Right => column += 1,
- }
- }
- return PointUtf16::new(row as u32, column as u32);
- }
- }
- unreachable!()
- }
-
- fn clip_offset_utf16(&self, target: OffsetUtf16, bias: Bias) -> OffsetUtf16 {
- let mut code_units = self.0.encode_utf16();
- let mut offset = code_units.by_ref().take(target.0).count();
- if char::decode_utf16(code_units).next().transpose().is_err() {
- match bias {
- Bias::Left => offset -= 1,
- Bias::Right => offset += 1,
- }
- }
- OffsetUtf16(offset)
- }
-}
-
impl sum_tree::Item for Chunk {
type Summary = ChunkSummary;
fn summary(&self, _cx: &()) -> Self::Summary {
- ChunkSummary::from(self.0.as_str())
+ ChunkSummary {
+ text: self.as_slice().text_summary(),
+ }
}
}
@@ -1169,14 +949,6 @@ pub struct ChunkSummary {
text: TextSummary,
}
-impl<'a> From<&'a str> for ChunkSummary {
- fn from(text: &'a str) -> Self {
- Self {
- text: TextSummary::from(text),
- }
- }
-}
-
impl sum_tree::Summary for ChunkSummary {
type Context = ();
@@ -1323,6 +1095,7 @@ impl std::ops::AddAssign<Self> for TextSummary {
pub trait TextDimension: 'static + for<'a> Dimension<'a, ChunkSummary> {
fn from_text_summary(summary: &TextSummary) -> Self;
+ fn from_chunk(chunk: ChunkSlice) -> Self;
fn add_assign(&mut self, other: &Self);
}
@@ -1334,6 +1107,10 @@ impl<D1: TextDimension, D2: TextDimension> TextDimension for (D1, D2) {
)
}
+ fn from_chunk(chunk: ChunkSlice) -> Self {
+ (D1::from_chunk(chunk), D2::from_chunk(chunk))
+ }
+
fn add_assign(&mut self, other: &Self) {
self.0.add_assign(&other.0);
self.1.add_assign(&other.1);
@@ -1355,6 +1132,10 @@ impl TextDimension for TextSummary {
summary.clone()
}
+ fn from_chunk(chunk: ChunkSlice) -> Self {
+ chunk.text_summary()
+ }
+
fn add_assign(&mut self, other: &Self) {
*self += other;
}
@@ -1375,6 +1156,10 @@ impl TextDimension for usize {
summary.len
}
+ fn from_chunk(chunk: ChunkSlice) -> Self {
+ chunk.len()
+ }
+
fn add_assign(&mut self, other: &Self) {
*self += other;
}
@@ -1395,6 +1180,10 @@ impl TextDimension for OffsetUtf16 {
summary.len_utf16
}
+ fn from_chunk(chunk: ChunkSlice) -> Self {
+ chunk.len_utf16()
+ }
+
fn add_assign(&mut self, other: &Self) {
*self += other;
}
@@ -1415,6 +1204,10 @@ impl TextDimension for Point {
summary.lines
}
+ fn from_chunk(chunk: ChunkSlice) -> Self {
+ chunk.lines()
+ }
+
fn add_assign(&mut self, other: &Self) {
*self += other;
}
@@ -1435,6 +1228,13 @@ impl TextDimension for PointUtf16 {
summary.lines_utf16()
}
+ fn from_chunk(chunk: ChunkSlice) -> Self {
+ PointUtf16 {
+ row: chunk.lines().row,
+ column: chunk.last_line_len_utf16(),
+ }
+ }
+
fn add_assign(&mut self, other: &Self) {
*self += other;
}
@@ -1919,7 +1719,7 @@ mod tests {
fn text(&self) -> String {
let mut text = String::new();
for chunk in self.chunks.cursor::<()>(&()) {
- text.push_str(&chunk.0);
+ text.push_str(&chunk.text);
}
text
}
@@ -1,4 +1,4 @@
-use crate::{ChunkSummary, TextDimension, TextSummary};
+use crate::{chunk::ChunkSlice, ChunkSummary, TextDimension, TextSummary};
use std::ops::{Add, AddAssign, Sub, SubAssign};
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
@@ -27,6 +27,10 @@ impl<T: TextDimension> TextDimension for Unclipped<T> {
Unclipped(T::from_text_summary(summary))
}
+ fn from_chunk(chunk: ChunkSlice) -> Self {
+ Unclipped(T::from_chunk(chunk))
+ }
+
fn add_assign(&mut self, other: &Self) {
TextDimension::add_assign(&mut self.0, &other.0);
}
@@ -14,7 +14,7 @@ doctest = false
[dependencies]
arrayvec = "0.7.1"
-rayon = "1.8"
+rayon.workspace = true
log.workspace = true
[dev-dependencies]