Start indexing UTF-16 offsets

Antonio Scandurra created

This is needed because cocoa will report ranges as UTF-16 indices.

Change summary

crates/editor/src/display_map/fold_map.rs |  28 ++--
crates/editor/src/multi_buffer.rs         | 120 ++++++++++++++++++++----
crates/project/src/fs.rs                  |   2 
crates/text/src/rope.rs                   | 113 +++++++++++++++++++++--
crates/text/src/tests.rs                  |  15 ++-
crates/text/src/text.rs                   |  38 +++++++
6 files changed, 261 insertions(+), 55 deletions(-)

Detailed changes

crates/editor/src/display_map/fold_map.rs 🔗

@@ -63,14 +63,14 @@ impl FoldPoint {
             .cursor::<(FoldPoint, TransformSummary)>();
         cursor.seek(self, Bias::Right, &());
         let overshoot = self.0 - cursor.start().1.output.lines;
-        let mut offset = cursor.start().1.output.bytes;
+        let mut offset = cursor.start().1.output.len;
         if !overshoot.is_zero() {
             let transform = cursor.item().expect("display point out of range");
             assert!(transform.output_text.is_none());
             let end_buffer_offset = snapshot
                 .buffer_snapshot
                 .point_to_offset(cursor.start().1.input.lines + overshoot);
-            offset += end_buffer_offset - cursor.start().1.input.bytes;
+            offset += end_buffer_offset - cursor.start().1.input.len;
         }
         FoldOffset(offset)
     }
@@ -249,7 +249,7 @@ impl FoldMap {
     fn check_invariants(&self) {
         if cfg!(test) {
             assert_eq!(
-                self.transforms.lock().summary().input.bytes,
+                self.transforms.lock().summary().input.len,
                 self.buffer.lock().len(),
                 "transform tree does not match buffer's length"
             );
@@ -341,7 +341,7 @@ impl FoldMap {
                     let mut fold = folds.next().unwrap();
                     let sum = new_transforms.summary();
 
-                    assert!(fold.start >= sum.input.bytes);
+                    assert!(fold.start >= sum.input.len);
 
                     while folds
                         .peek()
@@ -353,9 +353,9 @@ impl FoldMap {
                         }
                     }
 
-                    if fold.start > sum.input.bytes {
+                    if fold.start > sum.input.len {
                         let text_summary = new_buffer
-                            .text_summary_for_range::<TextSummary, _>(sum.input.bytes..fold.start);
+                            .text_summary_for_range::<TextSummary, _>(sum.input.len..fold.start);
                         new_transforms.push(
                             Transform {
                                 summary: TransformSummary {
@@ -384,9 +384,9 @@ impl FoldMap {
                 }
 
                 let sum = new_transforms.summary();
-                if sum.input.bytes < edit.new.end {
+                if sum.input.len < edit.new.end {
                     let text_summary = new_buffer
-                        .text_summary_for_range::<TextSummary, _>(sum.input.bytes..edit.new.end);
+                        .text_summary_for_range::<TextSummary, _>(sum.input.len..edit.new.end);
                     new_transforms.push(
                         Transform {
                             summary: TransformSummary {
@@ -558,7 +558,7 @@ impl FoldSnapshot {
     }
 
     pub fn len(&self) -> FoldOffset {
-        FoldOffset(self.transforms.summary().output.bytes)
+        FoldOffset(self.transforms.summary().output.len)
     }
 
     pub fn line_len(&self, row: u32) -> u32 {
@@ -766,7 +766,7 @@ impl FoldSnapshot {
                 )
             }
         } else {
-            FoldOffset(self.transforms.summary().output.bytes)
+            FoldOffset(self.transforms.summary().output.len)
         }
     }
 
@@ -1050,7 +1050,7 @@ impl<'a> Iterator for FoldChunks<'a> {
         // advance the transform and buffer cursors to the end of the fold.
         if let Some(output_text) = transform.output_text {
             self.buffer_chunk.take();
-            self.buffer_offset += transform.summary.input.bytes;
+            self.buffer_offset += transform.summary.input.len;
             self.buffer_chunks.seek(self.buffer_offset);
 
             while self.buffer_offset >= self.transform_cursor.end(&()).1
@@ -1158,7 +1158,7 @@ impl FoldOffset {
         let overshoot = if cursor.item().map_or(true, |t| t.is_fold()) {
             Point::new(0, (self.0 - cursor.start().0 .0) as u32)
         } else {
-            let buffer_offset = cursor.start().1.input.bytes + self.0 - cursor.start().0 .0;
+            let buffer_offset = cursor.start().1.input.len + self.0 - cursor.start().0 .0;
             let buffer_point = snapshot.buffer_snapshot.offset_to_point(buffer_offset);
             buffer_point - cursor.start().1.input.lines
         };
@@ -1176,7 +1176,7 @@ impl Sub for FoldOffset {
 
 impl<'a> sum_tree::Dimension<'a, TransformSummary> for FoldOffset {
     fn add_summary(&mut self, summary: &'a TransformSummary, _: &()) {
-        self.0 += &summary.output.bytes;
+        self.0 += &summary.output.len;
     }
 }
 
@@ -1188,7 +1188,7 @@ impl<'a> sum_tree::Dimension<'a, TransformSummary> for Point {
 
 impl<'a> sum_tree::Dimension<'a, TransformSummary> for usize {
     fn add_summary(&mut self, summary: &'a TransformSummary, _: &()) {
-        *self += &summary.input.bytes;
+        *self += &summary.input.len;
     }
 }
 

crates/editor/src/multi_buffer.rs 🔗

@@ -9,7 +9,7 @@ pub use language::Completion;
 use language::{
     char_kind, Buffer, BufferChunks, BufferSnapshot, CharKind, Chunk, DiagnosticEntry, Event, File,
     IndentSize, Language, OffsetRangeExt, Outline, OutlineItem, Selection, ToOffset as _,
-    ToPoint as _, ToPointUtf16 as _, TransactionId,
+    ToOffsetUtf16 as _, ToPoint as _, ToPointUtf16 as _, TransactionId,
 };
 use settings::Settings;
 use smallvec::SmallVec;
@@ -29,7 +29,7 @@ use text::{
     locator::Locator,
     rope::TextDimension,
     subscription::{Subscription, Topic},
-    Edit, Point, PointUtf16, TextSummary,
+    Edit, OffsetUtf16, Point, PointUtf16, TextSummary,
 };
 use theme::SyntaxTheme;
 use util::post_inc;
@@ -72,6 +72,10 @@ pub trait ToOffset: 'static + fmt::Debug {
     fn to_offset(&self, snapshot: &MultiBufferSnapshot) -> usize;
 }
 
+pub trait ToOffsetUtf16: 'static + fmt::Debug {
+    fn to_offset_utf16(&self, snapshot: &MultiBufferSnapshot) -> OffsetUtf16;
+}
+
 pub trait ToPoint: 'static + fmt::Debug {
     fn to_point(&self, snapshot: &MultiBufferSnapshot) -> Point;
 }
@@ -809,7 +813,7 @@ impl MultiBuffer {
         let mut cursor = snapshot.excerpts.cursor::<Option<&ExcerptId>>();
         let mut new_excerpts = cursor.slice(&Some(prev_excerpt_id), Bias::Right, &());
 
-        let edit_start = new_excerpts.summary().text.bytes;
+        let edit_start = new_excerpts.summary().text.len;
         new_excerpts.update_last(
             |excerpt| {
                 excerpt.has_trailing_newline = true;
@@ -862,7 +866,7 @@ impl MultiBuffer {
             &(),
         );
 
-        let edit_end = new_excerpts.summary().text.bytes;
+        let edit_end = new_excerpts.summary().text.len;
 
         let suffix = cursor.suffix(&());
         let changed_trailing_excerpt = suffix.is_empty();
@@ -1068,7 +1072,7 @@ impl MultiBuffer {
 
                 // Push an edit for the removal of this run of excerpts.
                 let old_end = cursor.start().1;
-                let new_start = new_excerpts.summary().text.bytes;
+                let new_start = new_excerpts.summary().text.len;
                 edits.push(Edit {
                     old: old_start..old_end,
                     new: new_start..new_start,
@@ -1297,7 +1301,7 @@ impl MultiBuffer {
                         )
                         .map(|mut edit| {
                             let excerpt_old_start = cursor.start().1;
-                            let excerpt_new_start = new_excerpts.summary().text.bytes;
+                            let excerpt_new_start = new_excerpts.summary().text.len;
                             edit.old.start += excerpt_old_start;
                             edit.old.end += excerpt_old_start;
                             edit.new.start += excerpt_new_start;
@@ -1527,7 +1531,7 @@ impl MultiBufferSnapshot {
         let mut cursor = self.excerpts.cursor::<usize>();
         cursor.seek(&offset, Bias::Left, &());
         let mut excerpt_chunks = cursor.item().map(|excerpt| {
-            let end_before_footer = cursor.start() + excerpt.text_summary.bytes;
+            let end_before_footer = cursor.start() + excerpt.text_summary.len;
             let start = excerpt.range.context.start.to_offset(&excerpt.buffer);
             let end = start + (cmp::min(offset, end_before_footer) - cursor.start());
             excerpt.buffer.reversed_chunks_in_range(start..end)
@@ -1629,7 +1633,7 @@ impl MultiBufferSnapshot {
     }
 
     pub fn len(&self) -> usize {
-        self.excerpts.summary().text.bytes
+        self.excerpts.summary().text.len
     }
 
     pub fn max_buffer_row(&self) -> u32 {
@@ -1824,7 +1828,53 @@ impl MultiBufferSnapshot {
                 .point_to_offset(excerpt_start_point + overshoot);
             *start_offset + buffer_offset - excerpt_start_offset
         } else {
-            self.excerpts.summary().text.bytes
+            self.excerpts.summary().text.len
+        }
+    }
+
+    pub fn offset_utf16_to_offset(&self, offset_utf16: OffsetUtf16) -> usize {
+        if let Some((_, _, buffer)) = self.as_singleton() {
+            return buffer.offset_utf16_to_offset(offset_utf16);
+        }
+
+        let mut cursor = self.excerpts.cursor::<(OffsetUtf16, usize)>();
+        cursor.seek(&offset_utf16, Bias::Right, &());
+        if let Some(excerpt) = cursor.item() {
+            let (start_offset_utf16, start_offset) = cursor.start();
+            let overshoot = offset_utf16 - start_offset_utf16;
+            let excerpt_start_offset = excerpt.range.context.start.to_offset(&excerpt.buffer);
+            let excerpt_start_offset_utf16 =
+                excerpt.buffer.offset_to_offset_utf16(excerpt_start_offset);
+            let buffer_offset = excerpt
+                .buffer
+                .offset_utf16_to_offset(excerpt_start_offset_utf16 + overshoot);
+            *start_offset + (buffer_offset - excerpt_start_offset)
+        } else {
+            self.excerpts.summary().text.len
+        }
+    }
+
+    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
+        if let Some((_, _, buffer)) = self.as_singleton() {
+            return buffer.offset_to_offset_utf16(offset);
+        }
+
+        let mut cursor = self.excerpts.cursor::<(usize, OffsetUtf16)>();
+        cursor.seek(&offset, Bias::Right, &());
+        if let Some(excerpt) = cursor.item() {
+            let (start_offset, start_offset_utf16) = cursor.start();
+            let overshoot = offset - start_offset;
+            let excerpt_start_offset_utf16 =
+                excerpt.range.context.start.to_offset_utf16(&excerpt.buffer);
+            let excerpt_start_offset = excerpt
+                .buffer
+                .offset_utf16_to_offset(excerpt_start_offset_utf16);
+            let buffer_offset_utf16 = excerpt
+                .buffer
+                .offset_to_offset_utf16(excerpt_start_offset + overshoot);
+            *start_offset_utf16 + (buffer_offset_utf16 - excerpt_start_offset_utf16)
+        } else {
+            OffsetUtf16(self.excerpts.summary().text.len_utf16)
         }
     }
 
@@ -1847,7 +1897,7 @@ impl MultiBufferSnapshot {
                 .point_utf16_to_offset(excerpt_start_point + overshoot);
             *start_offset + (buffer_offset - excerpt_start_offset)
         } else {
-            self.excerpts.summary().text.bytes
+            self.excerpts.summary().text.len
         }
     }
 
@@ -2311,7 +2361,7 @@ impl MultiBufferSnapshot {
                     .context
                     .start
                     .to_offset(&start_excerpt.buffer);
-                let excerpt_buffer_end = excerpt_buffer_start + start_excerpt.text_summary.bytes;
+                let excerpt_buffer_end = excerpt_buffer_start + start_excerpt.text_summary.len;
 
                 let start_in_buffer =
                     excerpt_buffer_start + range.start.saturating_sub(*cursor.start());
@@ -2415,7 +2465,7 @@ impl MultiBufferSnapshot {
                     .context
                     .start
                     .to_offset(&start_excerpt.buffer);
-                let excerpt_buffer_end = excerpt_buffer_start + start_excerpt.text_summary.bytes;
+                let excerpt_buffer_end = excerpt_buffer_start + start_excerpt.text_summary.len;
 
                 let start_in_buffer =
                     excerpt_buffer_start + range.start.saturating_sub(*cursor.start());
@@ -2717,11 +2767,11 @@ impl Excerpt {
     ) -> ExcerptChunks<'a> {
         let content_start = self.range.context.start.to_offset(&self.buffer);
         let chunks_start = content_start + range.start;
-        let chunks_end = content_start + cmp::min(range.end, self.text_summary.bytes);
+        let chunks_end = content_start + cmp::min(range.end, self.text_summary.len);
 
         let footer_height = if self.has_trailing_newline
-            && range.start <= self.text_summary.bytes
-            && range.end > self.text_summary.bytes
+            && range.start <= self.text_summary.len
+            && range.end > self.text_summary.len
         {
             1
         } else {
@@ -2739,10 +2789,10 @@ impl Excerpt {
     fn bytes_in_range(&self, range: Range<usize>) -> ExcerptBytes {
         let content_start = self.range.context.start.to_offset(&self.buffer);
         let bytes_start = content_start + range.start;
-        let bytes_end = content_start + cmp::min(range.end, self.text_summary.bytes);
+        let bytes_end = content_start + cmp::min(range.end, self.text_summary.len);
         let footer_height = if self.has_trailing_newline
-            && range.start <= self.text_summary.bytes
-            && range.end > self.text_summary.bytes
+            && range.start <= self.text_summary.len
+            && range.end > self.text_summary.len
         {
             1
         } else {
@@ -2836,13 +2886,13 @@ impl<'a> sum_tree::Dimension<'a, ExcerptSummary> for TextSummary {
 
 impl<'a> sum_tree::Dimension<'a, ExcerptSummary> for usize {
     fn add_summary(&mut self, summary: &'a ExcerptSummary, _: &()) {
-        *self += summary.text.bytes;
+        *self += summary.text.len;
     }
 }
 
 impl<'a> sum_tree::SeekTarget<'a, ExcerptSummary, ExcerptSummary> for usize {
     fn cmp(&self, cursor_location: &ExcerptSummary, _: &()) -> cmp::Ordering {
-        Ord::cmp(self, &cursor_location.text.bytes)
+        Ord::cmp(self, &cursor_location.text.len)
     }
 }
 
@@ -2852,6 +2902,12 @@ impl<'a> sum_tree::SeekTarget<'a, ExcerptSummary, ExcerptSummary> for Option<&'a
     }
 }
 
+impl<'a> sum_tree::Dimension<'a, ExcerptSummary> for OffsetUtf16 {
+    fn add_summary(&mut self, summary: &'a ExcerptSummary, _: &()) {
+        self.0 += summary.text.len_utf16;
+    }
+}
+
 impl<'a> sum_tree::Dimension<'a, ExcerptSummary> for Point {
     fn add_summary(&mut self, summary: &'a ExcerptSummary, _: &()) {
         *self += summary.text.lines;
@@ -3060,6 +3116,24 @@ impl ToOffset for usize {
     }
 }
 
+impl ToOffset for OffsetUtf16 {
+    fn to_offset<'a>(&self, snapshot: &MultiBufferSnapshot) -> usize {
+        snapshot.offset_utf16_to_offset(*self)
+    }
+}
+
+impl ToOffsetUtf16 for OffsetUtf16 {
+    fn to_offset_utf16(&self, _snapshot: &MultiBufferSnapshot) -> OffsetUtf16 {
+        *self
+    }
+}
+
+impl ToOffsetUtf16 for usize {
+    fn to_offset_utf16(&self, snapshot: &MultiBufferSnapshot) -> OffsetUtf16 {
+        snapshot.offset_to_offset_utf16(*self)
+    }
+}
+
 impl ToPoint for usize {
     fn to_point<'a>(&self, snapshot: &MultiBufferSnapshot) -> Point {
         snapshot.offset_to_point(*self)
@@ -3823,7 +3897,7 @@ mod tests {
                     buffer.text_summary_for_range::<PointUtf16, _>(0..buffer_range.start);
 
                 let excerpt_start = excerpt_starts.next().unwrap();
-                let mut offset = excerpt_start.bytes;
+                let mut offset = excerpt_start.len;
                 let mut buffer_offset = buffer_range.start;
                 let mut point = excerpt_start.lines;
                 let mut buffer_point = buffer_start_point;
@@ -3841,7 +3915,7 @@ mod tests {
                         let buffer_right_offset = buffer.clip_offset(buffer_offset, Bias::Right);
                         assert_eq!(
                             left_offset,
-                            excerpt_start.bytes + (buffer_left_offset - buffer_range.start),
+                            excerpt_start.len + (buffer_left_offset - buffer_range.start),
                             "clip_offset({:?}, Left). buffer: {:?}, buffer offset: {:?}",
                             offset,
                             buffer_id,
@@ -3849,7 +3923,7 @@ mod tests {
                         );
                         assert_eq!(
                             right_offset,
-                            excerpt_start.bytes + (buffer_right_offset - buffer_range.start),
+                            excerpt_start.len + (buffer_right_offset - buffer_range.start),
                             "clip_offset({:?}, Right). buffer: {:?}, buffer offset: {:?}",
                             offset,
                             buffer_id,

crates/project/src/fs.rs 🔗

@@ -162,7 +162,7 @@ impl Fs for RealFs {
     }
 
     async fn save(&self, path: &Path, text: &Rope, line_ending: LineEnding) -> Result<()> {
-        let buffer_size = text.summary().bytes.min(10 * 1024);
+        let buffer_size = text.summary().len.min(10 * 1024);
         let file = smol::fs::File::create(path).await?;
         let mut writer = smol::io::BufWriter::with_capacity(buffer_size, file);
         for chunk in chunks(text, line_ending) {

crates/text/src/rope.rs 🔗

@@ -1,6 +1,5 @@
-use crate::PointUtf16;
-
 use super::Point;
+use crate::{OffsetUtf16, PointUtf16};
 use arrayvec::ArrayString;
 use bromberg_sl2::{DigestString, HashMatrix};
 use smallvec::SmallVec;
@@ -165,8 +164,34 @@ impl Rope {
         Chunks::new(self, range, true)
     }
 
+    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
+        if offset >= self.summary().len {
+            return OffsetUtf16(self.summary().len_utf16);
+        }
+        let mut cursor = self.chunks.cursor::<(usize, OffsetUtf16)>();
+        cursor.seek(&offset, Bias::Left, &());
+        let overshoot = offset - cursor.start().0;
+        cursor.start().1
+            + cursor.item().map_or(Default::default(), |chunk| {
+                chunk.offset_to_offset_utf16(overshoot)
+            })
+    }
+
+    pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
+        if offset.0 >= self.summary().len_utf16 {
+            return self.summary().len;
+        }
+        let mut cursor = self.chunks.cursor::<(OffsetUtf16, usize)>();
+        cursor.seek(&offset, Bias::Left, &());
+        let overshoot = offset - cursor.start().0;
+        cursor.start().1
+            + cursor.item().map_or(Default::default(), |chunk| {
+                chunk.offset_utf16_to_offset(overshoot)
+            })
+    }
+
     pub fn offset_to_point(&self, offset: usize) -> Point {
-        if offset >= self.summary().bytes {
+        if offset >= self.summary().len {
             return self.summary().lines;
         }
         let mut cursor = self.chunks.cursor::<(usize, Point)>();
@@ -179,7 +204,7 @@ impl Rope {
     }
 
     pub fn offset_to_point_utf16(&self, offset: usize) -> PointUtf16 {
-        if offset >= self.summary().bytes {
+        if offset >= self.summary().len {
             return self.summary().lines_utf16;
         }
         let mut cursor = self.chunks.cursor::<(usize, PointUtf16)>();
@@ -206,7 +231,7 @@ impl Rope {
 
     pub fn point_to_offset(&self, point: Point) -> usize {
         if point >= self.summary().lines {
-            return self.summary().bytes;
+            return self.summary().len;
         }
         let mut cursor = self.chunks.cursor::<(Point, usize)>();
         cursor.seek(&point, Bias::Left, &());
@@ -219,7 +244,7 @@ impl Rope {
 
     pub fn point_utf16_to_offset(&self, point: PointUtf16) -> usize {
         if point >= self.summary().lines_utf16 {
-            return self.summary().bytes;
+            return self.summary().len;
         }
         let mut cursor = self.chunks.cursor::<(PointUtf16, usize)>();
         cursor.seek(&point, Bias::Left, &());
@@ -262,7 +287,7 @@ impl Rope {
             }
             offset
         } else {
-            self.summary().bytes
+            self.summary().len
         }
     }
 
@@ -543,6 +568,34 @@ impl<'a> io::Read for Bytes<'a> {
 struct Chunk(ArrayString<{ 2 * CHUNK_BASE }>);
 
 impl Chunk {
+    fn offset_to_offset_utf16(&self, target: usize) -> OffsetUtf16 {
+        let mut offset = 0;
+        let mut offset_utf16 = OffsetUtf16(0);
+        for ch in self.0.chars() {
+            if offset >= target {
+                break;
+            }
+
+            offset += ch.len_utf8();
+            offset_utf16.0 += ch.len_utf16();
+        }
+        offset_utf16
+    }
+
+    fn offset_utf16_to_offset(&self, target: OffsetUtf16) -> usize {
+        let mut offset_utf16 = OffsetUtf16(0);
+        let mut offset = 0;
+        for ch in self.0.chars() {
+            if offset_utf16 >= target {
+                break;
+            }
+
+            offset += ch.len_utf8();
+            offset_utf16.0 += ch.len_utf16();
+        }
+        offset
+    }
+
     fn offset_to_point(&self, target: usize) -> Point {
         let mut offset = 0;
         let mut point = Point::new(0, 0);
@@ -748,7 +801,8 @@ impl sum_tree::Summary for ChunkSummary {
 
 #[derive(Clone, Debug, Default, Eq, PartialEq)]
 pub struct TextSummary {
-    pub bytes: usize,
+    pub len: usize,
+    pub len_utf16: usize,
     pub lines: Point,
     pub lines_utf16: PointUtf16,
     pub first_line_chars: u32,
@@ -759,6 +813,7 @@ pub struct TextSummary {
 
 impl<'a> From<&'a str> for TextSummary {
     fn from(text: &'a str) -> Self {
+        let mut len_utf16 = 0;
         let mut lines = Point::new(0, 0);
         let mut lines_utf16 = PointUtf16::new(0, 0);
         let mut first_line_chars = 0;
@@ -766,6 +821,8 @@ impl<'a> From<&'a str> for TextSummary {
         let mut longest_row = 0;
         let mut longest_row_chars = 0;
         for c in text.chars() {
+            len_utf16 += c.len_utf16();
+
             if c == '\n' {
                 lines += Point::new(1, 0);
                 lines_utf16 += PointUtf16::new(1, 0);
@@ -787,7 +844,8 @@ impl<'a> From<&'a str> for TextSummary {
         }
 
         TextSummary {
-            bytes: text.len(),
+            len: text.len(),
+            len_utf16,
             lines,
             lines_utf16,
             first_line_chars,
@@ -837,7 +895,8 @@ impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
             self.last_line_chars = other.last_line_chars;
         }
 
-        self.bytes += other.bytes;
+        self.len += other.len;
+        self.len_utf16 += other.len_utf16;
         self.lines += other.lines;
         self.lines_utf16 += other.lines_utf16;
     }
@@ -886,13 +945,29 @@ impl TextDimension for TextSummary {
 
 impl<'a> sum_tree::Dimension<'a, ChunkSummary> for usize {
     fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
-        *self += summary.text.bytes;
+        *self += summary.text.len;
     }
 }
 
 impl TextDimension for usize {
     fn from_text_summary(summary: &TextSummary) -> Self {
-        summary.bytes
+        summary.len
+    }
+
+    fn add_assign(&mut self, other: &Self) {
+        *self += other;
+    }
+}
+
+impl<'a> sum_tree::Dimension<'a, ChunkSummary> for OffsetUtf16 {
+    fn add_summary(&mut self, summary: &'a ChunkSummary, _: &()) {
+        self.0 += summary.text.len_utf16;
+    }
+}
+
+impl TextDimension for OffsetUtf16 {
+    fn from_text_summary(summary: &TextSummary) -> Self {
+        Self(summary.len_utf16)
     }
 
     fn add_assign(&mut self, other: &Self) {
@@ -1054,6 +1129,7 @@ mod tests {
                 );
             }
 
+            let mut offset_utf16 = OffsetUtf16(0);
             let mut point = Point::new(0, 0);
             let mut point_utf16 = PointUtf16::new(0, 0);
             for (ix, ch) in expected.char_indices().chain(Some((expected.len(), '\0'))) {
@@ -1076,6 +1152,18 @@ mod tests {
                     "point_utf16_to_offset({:?})",
                     point_utf16
                 );
+                assert_eq!(
+                    actual.offset_to_offset_utf16(ix),
+                    offset_utf16,
+                    "offset_to_offset_utf16({:?})",
+                    ix
+                );
+                assert_eq!(
+                    actual.offset_utf16_to_offset(offset_utf16),
+                    ix,
+                    "offset_utf16_to_offset({:?})",
+                    offset_utf16
+                );
                 if ch == '\n' {
                     point += Point::new(1, 0);
                     point_utf16 += PointUtf16::new(1, 0);
@@ -1083,6 +1171,7 @@ mod tests {
                     point.column += ch.len_utf8() as u32;
                     point_utf16.column += ch.len_utf16() as u32;
                 }
+                offset_utf16.0 += ch.len_utf16();
             }
 
             let mut point_utf16 = PointUtf16::zero();

crates/text/src/tests.rs 🔗

@@ -247,7 +247,8 @@ fn test_text_summary_for_range() {
     assert_eq!(
         buffer.text_summary_for_range::<TextSummary, _>(1..3),
         TextSummary {
-            bytes: 2,
+            len: 2,
+            len_utf16: 2,
             lines: Point::new(1, 0),
             lines_utf16: PointUtf16::new(1, 0),
             first_line_chars: 1,
@@ -259,7 +260,8 @@ fn test_text_summary_for_range() {
     assert_eq!(
         buffer.text_summary_for_range::<TextSummary, _>(1..12),
         TextSummary {
-            bytes: 11,
+            len: 11,
+            len_utf16: 11,
             lines: Point::new(3, 0),
             lines_utf16: PointUtf16::new(3, 0),
             first_line_chars: 1,
@@ -271,7 +273,8 @@ fn test_text_summary_for_range() {
     assert_eq!(
         buffer.text_summary_for_range::<TextSummary, _>(0..20),
         TextSummary {
-            bytes: 20,
+            len: 20,
+            len_utf16: 20,
             lines: Point::new(4, 1),
             lines_utf16: PointUtf16::new(4, 1),
             first_line_chars: 2,
@@ -283,7 +286,8 @@ fn test_text_summary_for_range() {
     assert_eq!(
         buffer.text_summary_for_range::<TextSummary, _>(0..22),
         TextSummary {
-            bytes: 22,
+            len: 22,
+            len_utf16: 22,
             lines: Point::new(4, 3),
             lines_utf16: PointUtf16::new(4, 3),
             first_line_chars: 2,
@@ -295,7 +299,8 @@ fn test_text_summary_for_range() {
     assert_eq!(
         buffer.text_summary_for_range::<TextSummary, _>(7..22),
         TextSummary {
-            bytes: 15,
+            len: 15,
+            len_utf16: 15,
             lines: Point::new(2, 3),
             lines_utf16: PointUtf16::new(2, 3),
             first_line_chars: 4,

crates/text/src/text.rs 🔗

@@ -2,6 +2,7 @@ mod anchor;
 pub mod locator;
 #[cfg(any(test, feature = "test-support"))]
 pub mod network;
+mod offset_utf16;
 pub mod operation_queue;
 mod patch;
 mod point;
@@ -20,6 +21,7 @@ use clock::ReplicaId;
 use collections::{HashMap, HashSet};
 use lazy_static::lazy_static;
 use locator::Locator;
+pub use offset_utf16::*;
 use operation_queue::OperationQueue;
 pub use patch::Patch;
 pub use point::*;
@@ -1621,6 +1623,14 @@ impl BufferSnapshot {
         self.visible_text.point_utf16_to_point(point)
     }
 
+    pub fn offset_utf16_to_offset(&self, offset: OffsetUtf16) -> usize {
+        self.visible_text.offset_utf16_to_offset(offset)
+    }
+
+    pub fn offset_to_offset_utf16(&self, offset: usize) -> OffsetUtf16 {
+        self.visible_text.offset_to_offset_utf16(offset)
+    }
+
     pub fn offset_to_point(&self, offset: usize) -> Point {
         self.visible_text.offset_to_point(offset)
     }
@@ -2423,6 +2433,12 @@ impl ToOffset for usize {
     }
 }
 
+impl ToOffset for OffsetUtf16 {
+    fn to_offset<'a>(&self, snapshot: &BufferSnapshot) -> usize {
+        snapshot.offset_utf16_to_offset(*self)
+    }
+}
+
 impl ToOffset for Anchor {
     fn to_offset<'a>(&self, snapshot: &BufferSnapshot) -> usize {
         snapshot.summary_for_anchor(self)
@@ -2491,6 +2507,28 @@ impl ToPointUtf16 for Point {
     }
 }
 
+pub trait ToOffsetUtf16 {
+    fn to_offset_utf16<'a>(&self, snapshot: &BufferSnapshot) -> OffsetUtf16;
+}
+
+impl ToOffsetUtf16 for Anchor {
+    fn to_offset_utf16<'a>(&self, snapshot: &BufferSnapshot) -> OffsetUtf16 {
+        snapshot.summary_for_anchor(self)
+    }
+}
+
+impl ToOffsetUtf16 for usize {
+    fn to_offset_utf16<'a>(&self, snapshot: &BufferSnapshot) -> OffsetUtf16 {
+        snapshot.offset_to_offset_utf16(*self)
+    }
+}
+
+impl ToOffsetUtf16 for OffsetUtf16 {
+    fn to_offset_utf16<'a>(&self, _snapshot: &BufferSnapshot) -> OffsetUtf16 {
+        *self
+    }
+}
+
 pub trait Clip {
     fn clip(&self, bias: Bias, snapshot: &BufferSnapshot) -> Self;
 }