WIP - Serialize buffer in terms of its state, not its base text + ops

Max Brunsfeld created

The main reason for this is that we need to include information about
a buffer's UndoMap into its protobuf representation. But it's a bit
complex to correctly incorporate this information into the current
protobuf representation.

If we want to continue reusing `Buffer::apply_remote_edit` for
incorporating the historical operations, we need to either make
that method capable of incorporating already-undone edits, or
serialize the UndoMap into undo *operations*, so that we can apply
these undo operations after the fact when deserializing. But this is
not trivial, because an UndoOperation requires information about
the full offset ranges that were undone.

Change summary

crates/language/src/buffer.rs | 40 +++++++++++++-----
crates/language/src/proto.rs  | 78 ++++++++++++++++++++++++++++++++++++
crates/language/src/tests.rs  | 19 +++++++-
crates/rpc/proto/zed.proto    | 38 +++++++++++++----
crates/rpc/src/peer.rs        | 16 +-----
crates/text/src/text.rs       | 75 +++++++++++++++++++++++++++++++---
6 files changed, 222 insertions(+), 44 deletions(-)

Detailed changes

crates/language/src/buffer.rs 🔗

@@ -287,13 +287,24 @@ impl Buffer {
         file: Option<Box<dyn File>>,
         cx: &mut ModelContext<Self>,
     ) -> Result<Self> {
-        let mut buffer =
-            text::Buffer::new(replica_id, message.id, History::new(message.content.into()));
-        let ops = message
-            .history
-            .into_iter()
-            .map(|op| text::Operation::Edit(proto::deserialize_edit_operation(op)));
-        buffer.apply_ops(ops)?;
+        let mut fragments_len = message.fragments.len();
+        let buffer = TextBuffer::from_parts(
+            replica_id,
+            message.id,
+            message.content,
+            message.deleted_content,
+            message
+                .undo_map
+                .into_iter()
+                .map(proto::deserialize_undo_map_entry),
+            message
+                .fragments
+                .into_iter()
+                .enumerate()
+                .map(|(i, fragment)| {
+                    proto::deserialize_buffer_fragment(fragment, i, fragments_len)
+                }),
+        );
         let mut this = Self::build(buffer, file);
         for selection_set in message.selections {
             this.remote_selections.insert(
@@ -320,11 +331,18 @@ impl Buffer {
     pub fn to_proto(&self) -> proto::Buffer {
         proto::Buffer {
             id: self.remote_id(),
-            content: self.text.base_text().to_string(),
-            history: self
+            content: self.text.text(),
+            deleted_content: self.text.deleted_text(),
+            undo_map: self
+                .text
+                .undo_history()
+                .map(proto::serialize_undo_map_entry)
+                .collect(),
+            version: proto::serialize_vector_clock(&self.version),
+            fragments: self
                 .text
-                .history()
-                .map(proto::serialize_edit_operation)
+                .fragments()
+                .map(proto::serialize_buffer_fragment)
                 .collect(),
             selections: self
                 .remote_selections

crates/language/src/proto.rs 🔗

@@ -32,7 +32,7 @@ pub fn serialize_operation(operation: &Operation) -> proto::Operation {
                 counts: undo
                     .counts
                     .iter()
-                    .map(|(edit_id, count)| proto::operation::UndoCount {
+                    .map(|(edit_id, count)| proto::UndoCount {
                         replica_id: edit_id.replica_id as u32,
                         local_timestamp: edit_id.value,
                         count: *count,
@@ -91,6 +91,50 @@ pub fn serialize_edit_operation(operation: &EditOperation) -> proto::operation::
     }
 }
 
+pub fn serialize_undo_map_entry(
+    (edit_id, counts): (&clock::Local, &[(clock::Local, u32)]),
+) -> proto::UndoMapEntry {
+    proto::UndoMapEntry {
+        replica_id: edit_id.replica_id as u32,
+        local_timestamp: edit_id.value,
+        counts: counts
+            .iter()
+            .map(|(undo_id, count)| proto::UndoCount {
+                replica_id: undo_id.replica_id as u32,
+                local_timestamp: undo_id.value,
+                count: *count,
+            })
+            .collect(),
+    }
+}
+
+pub fn serialize_buffer_fragment(fragment: &text::Fragment) -> proto::BufferFragment {
+    proto::BufferFragment {
+        replica_id: fragment.insertion_timestamp.replica_id as u32,
+        local_timestamp: fragment.insertion_timestamp.local,
+        lamport_timestamp: fragment.insertion_timestamp.lamport,
+        insertion_offset: fragment.insertion_offset as u32,
+        len: fragment.len as u32,
+        visible: fragment.visible,
+        deletions: fragment
+            .deletions
+            .iter()
+            .map(|clock| proto::VectorClockEntry {
+                replica_id: clock.replica_id as u32,
+                timestamp: clock.value,
+            })
+            .collect(),
+        max_undos: fragment
+            .max_undos
+            .iter()
+            .map(|clock| proto::VectorClockEntry {
+                replica_id: clock.replica_id as u32,
+                timestamp: clock.value,
+            })
+            .collect(),
+    }
+}
+
 pub fn serialize_selections(selections: &Arc<[Selection<Anchor>]>) -> Vec<proto::Selection> {
     selections
         .iter()
@@ -252,6 +296,38 @@ pub fn deserialize_edit_operation(edit: proto::operation::Edit) -> EditOperation
     }
 }
 
+pub fn deserialize_undo_map_entry(
+    entry: proto::UndoMapEntry,
+) -> (clock::Local, Vec<(clock::Local, u32)>) {
+    (
+        clock::Local {
+            replica_id: entry.replica_id as u16,
+            value: entry.local_timestamp,
+        },
+        entry
+            .counts
+            .into_iter()
+            .map(|undo_count| {
+                (
+                    clock::Local {
+                        replica_id: undo_count.replica_id as u16,
+                        value: undo_count.local_timestamp,
+                    },
+                    undo_count.count,
+                )
+            })
+            .collect(),
+    )
+}
+
+pub fn deserialize_buffer_fragment(
+    message: proto::BufferFragment,
+    ix: usize,
+    count: usize,
+) -> Fragment {
+    todo!()
+}
+
 pub fn deserialize_selections(selections: Vec<proto::Selection>) -> Arc<[Selection<Anchor>]> {
     Arc::from(
         selections

crates/language/src/tests.rs 🔗

@@ -782,17 +782,30 @@ async fn test_empty_diagnostic_ranges(mut cx: gpui::TestAppContext) {
 
 #[gpui::test]
 fn test_serialization(cx: &mut gpui::MutableAppContext) {
+    let mut now = Instant::now();
+
     let buffer1 = cx.add_model(|cx| {
         let mut buffer = Buffer::new(0, "abc", cx);
-        buffer.edit([3..3], "DE", cx);
+        buffer.edit([3..3], "D", cx);
+
+        now += Duration::from_secs(1);
+        buffer.start_transaction_at(now);
+        buffer.edit([4..4], "E", cx);
+        buffer.end_transaction_at(now, cx);
+        assert_eq!(buffer.text(), "abcDE");
+
         buffer.undo(cx);
+        assert_eq!(buffer.text(), "abcD");
+
+        buffer.edit([4..4], "F", cx);
+        assert_eq!(buffer.text(), "abcDF");
         buffer
     });
-    assert_eq!(buffer1.read(cx).text(), "abc");
+    assert_eq!(buffer1.read(cx).text(), "abcDF");
 
     let message = buffer1.read(cx).to_proto();
     let buffer2 = cx.add_model(|cx| Buffer::from_proto(1, message, None, cx).unwrap());
-    assert_eq!(buffer2.read(cx).text(), "abc");
+    assert_eq!(buffer2.read(cx).text(), "abcDF");
 }
 
 fn chunks_with_diagnostics<T: ToOffset + ToPoint>(

crates/rpc/proto/zed.proto 🔗

@@ -263,9 +263,23 @@ message Entry {
 message Buffer {
     uint64 id = 1;
     string content = 2;
-    repeated Operation.Edit history = 3;
-    repeated SelectionSet selections = 4;
-    repeated DiagnosticSet diagnostic_sets = 5;
+    string deleted_content = 3;
+    repeated BufferFragment fragments = 4;
+    repeated UndoMapEntry undo_map = 5;
+    repeated VectorClockEntry version = 6;
+    repeated SelectionSet selections = 7;
+    repeated DiagnosticSet diagnostic_sets = 8;
+}
+
+message BufferFragment {
+    uint32 replica_id = 1;
+    uint32 local_timestamp = 2;
+    uint32 lamport_timestamp = 3;
+    uint32 insertion_offset = 4;
+    uint32 len = 5;
+    bool visible =  6;
+    repeated VectorClockEntry deletions = 7;
+    repeated VectorClockEntry max_undos = 8;
 }
 
 message SelectionSet {
@@ -350,12 +364,6 @@ message Operation {
         repeated UndoCount counts = 6;
     }
 
-    message UndoCount {
-        uint32 replica_id = 1;
-        uint32 local_timestamp = 2;
-        uint32 count = 3;
-    }
-
     message UpdateSelections {
         uint32 replica_id = 1;
         uint32 lamport_timestamp = 3;
@@ -368,6 +376,18 @@ message Operation {
     }
 }
 
+message UndoMapEntry {
+    uint32 replica_id = 1;
+    uint32 local_timestamp = 2;
+    repeated UndoCount counts = 3;
+}
+
+message UndoCount {
+    uint32 replica_id = 1;
+    uint32 local_timestamp = 2;
+    uint32 count = 3;
+}
+
 message VectorClockEntry {
     uint32 replica_id = 1;
     uint32 timestamp = 2;

crates/rpc/src/peer.rs 🔗

@@ -399,9 +399,7 @@ mod tests {
                     buffer: Some(proto::Buffer {
                         id: 101,
                         content: "path/one content".to_string(),
-                        history: vec![],
-                        selections: vec![],
-                        diagnostic_sets: vec![],
+                        ..Default::default()
                     }),
                 }
             );
@@ -422,9 +420,7 @@ mod tests {
                     buffer: Some(proto::Buffer {
                         id: 102,
                         content: "path/two content".to_string(),
-                        history: vec![],
-                        selections: vec![],
-                        diagnostic_sets: vec![],
+                        ..Default::default()
                     }),
                 }
             );
@@ -453,9 +449,7 @@ mod tests {
                                     buffer: Some(proto::Buffer {
                                         id: 101,
                                         content: "path/one content".to_string(),
-                                        history: vec![],
-                                        selections: vec![],
-                                        diagnostic_sets: vec![],
+                                        ..Default::default()
                                     }),
                                 }
                             }
@@ -465,9 +459,7 @@ mod tests {
                                     buffer: Some(proto::Buffer {
                                         id: 102,
                                         content: "path/two content".to_string(),
-                                        history: vec![],
-                                        selections: vec![],
-                                        diagnostic_sets: vec![],
+                                        ..Default::default()
                                     }),
                                 }
                             }

crates/text/src/text.rs 🔗

@@ -384,14 +384,14 @@ impl InsertionTimestamp {
 }
 
 #[derive(Eq, PartialEq, Clone, Debug)]
-struct Fragment {
+pub struct Fragment {
     id: Locator,
-    insertion_timestamp: InsertionTimestamp,
-    insertion_offset: usize,
-    len: usize,
-    visible: bool,
-    deletions: HashSet<clock::Local>,
-    max_undos: clock::Global,
+    pub insertion_timestamp: InsertionTimestamp,
+    pub insertion_offset: usize,
+    pub len: usize,
+    pub visible: bool,
+    pub deletions: HashSet<clock::Local>,
+    pub max_undos: clock::Global,
 }
 
 #[derive(Eq, PartialEq, Clone, Debug)]
@@ -508,6 +508,40 @@ impl Buffer {
         }
     }
 
+    pub fn from_parts(
+        replica_id: u16,
+        remote_id: u64,
+        visible_text: &str,
+        deleted_text: &str,
+        undo_map: impl Iterator<Item = (clock::Local, Vec<(clock::Local, u32)>)>,
+        fragments: impl ExactSizeIterator<Item = Fragment>,
+    ) -> Self {
+        let visible_text = visible_text.into();
+        let deleted_text = deleted_text.into();
+        let fragments = SumTree::from_iter(fragments, &None);
+        let undo_map = UndoMap(undo_map.collect());
+        Self {
+            remote_id,
+            replica_id,
+            snapshot: BufferSnapshot {
+                replica_id,
+                visible_text,
+                deleted_text,
+                undo_map,
+                fragments,
+                insertions: (),
+                version: (),
+            },
+            history: History::new("".into()),
+            deferred_ops: OperationQueue::new(),
+            deferred_replicas: Default::default(),
+            last_edit: todo!(),
+            local_clock: todo!(),
+            lamport_clock: todo!(),
+            subscriptions: Default::default(),
+        }
+    }
+
     pub fn version(&self) -> clock::Global {
         self.version.clone()
     }
@@ -1058,6 +1092,7 @@ impl Buffer {
         self.deferred_replicas.clear();
         let mut deferred_ops = Vec::new();
         for op in self.deferred_ops.drain().iter().cloned() {
+            dbg!(&self.version, &op, self.can_apply_op(&op));
             if self.can_apply_op(&op) {
                 self.apply_op(op)?;
             } else {
@@ -1120,6 +1155,13 @@ impl Buffer {
         self.history.ops.values()
     }
 
+    pub fn undo_history(&self) -> impl Iterator<Item = (&clock::Local, &[(clock::Local, u32)])> {
+        self.undo_map
+            .0
+            .iter()
+            .map(|(edit_id, undo_counts)| (edit_id, undo_counts.as_slice()))
+    }
+
     pub fn undo(&mut self) -> Option<(TransactionId, Operation)> {
         if let Some(transaction) = self.history.pop_undo().cloned() {
             let transaction_id = transaction.id;
@@ -1288,7 +1330,24 @@ impl BufferSnapshot {
     }
 
     pub fn text(&self) -> String {
-        self.text_for_range(0..self.len()).collect()
+        self.visible_text.to_string()
+    }
+
+    pub fn deleted_text(&self) -> String {
+        self.deleted_text.to_string()
+    }
+
+    pub fn fragments(&self) -> impl Iterator<Item = &Fragment> {
+        let mut cursor = self.fragments.cursor::<()>();
+        let mut started = false;
+        std::iter::from_fn(move || {
+            if started {
+                cursor.next(&None);
+            } else {
+                started = true;
+            }
+            cursor.item()
+        })
     }
 
     pub fn text_summary(&self) -> TextSummary {