Revert "language: Remove buffer fingerprinting (#9007)"

Conrad Irwin created

This reverts commit 6f2f61c9b1bf7a7285ba2a27d10c4c7a6022e670.

Change summary

Cargo.lock                                                    | 18 ++
crates/collab/src/tests/random_project_collaboration_tests.rs | 10 
crates/copilot/src/copilot.rs                                 |  1 
crates/editor/src/items.rs                                    |  3 
crates/language/src/buffer.rs                                 | 38 ++++
crates/language/src/proto.rs                                  | 11 +
crates/project/src/project.rs                                 | 14 +
crates/project/src/project_tests.rs                           |  7 
crates/rope/Cargo.toml                                        |  1 
crates/rope/src/rope.rs                                       | 18 ++
crates/rpc/proto/zed.proto                                    |  6 
crates/worktree/src/worktree.rs                               | 17 +
12 files changed, 122 insertions(+), 22 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -1567,6 +1567,17 @@ dependencies = [
  "workspace",
 ]
 
+[[package]]
+name = "bromberg_sl2"
+version = "0.6.0"
+source = "git+https://github.com/zed-industries/bromberg_sl2?rev=950bc5482c216c395049ae33ae4501e08975f17f#950bc5482c216c395049ae33ae4501e08975f17f"
+dependencies = [
+ "digest 0.9.0",
+ "lazy_static",
+ "rayon",
+ "seq-macro",
+]
+
 [[package]]
 name = "bstr"
 version = "1.6.2"
@@ -7746,6 +7757,7 @@ name = "rope"
 version = "0.1.0"
 dependencies = [
  "arrayvec 0.7.4",
+ "bromberg_sl2",
  "gpui",
  "log",
  "rand 0.8.5",
@@ -8325,6 +8337,12 @@ version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918"
 
+[[package]]
+name = "seq-macro"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a9f47faea3cad316faa914d013d24f471cd90bfca1a0c70f05a3f42c6441e99"
+
 [[package]]
 name = "serde"
 version = "1.0.196"

crates/collab/src/tests/random_project_collaboration_tests.rs 🔗

@@ -1347,11 +1347,13 @@ impl RandomizedTest for ProjectCollaborationTest {
                             client.username
                         );
 
-                    let host_is_dirty = host_buffer.read_with(host_cx, |b, _| b.is_dirty());
-                    let guest_is_dirty = guest_buffer.read_with(client_cx, |b, _| b.is_dirty());
+                    let host_saved_version_fingerprint =
+                        host_buffer.read_with(host_cx, |b, _| b.saved_version_fingerprint());
+                    let guest_saved_version_fingerprint =
+                        guest_buffer.read_with(client_cx, |b, _| b.saved_version_fingerprint());
                     assert_eq!(
-                            guest_is_dirty, host_is_dirty,
-                            "guest {} dirty state does not match host's for path {path:?} in project {project_id}",
+                            guest_saved_version_fingerprint, host_saved_version_fingerprint,
+                            "guest {} saved fingerprint does not match host's for path {path:?} in project {project_id}",
                             client.username
                         );
 

crates/copilot/src/copilot.rs 🔗

@@ -1263,6 +1263,7 @@ mod tests {
             &self,
             _: BufferId,
             _: &clock::Global,
+            _: language::RopeFingerprint,
             _: language::LineEnding,
             _: Option<std::time::SystemTime>,
             _: &mut AppContext,

crates/editor/src/items.rs 🔗

@@ -730,8 +730,9 @@ impl Item for Editor {
                     buffer
                         .update(&mut cx, |buffer, cx| {
                             let version = buffer.saved_version().clone();
+                            let fingerprint = buffer.saved_version_fingerprint();
                             let mtime = buffer.saved_mtime();
-                            buffer.did_save(version, mtime, cx);
+                            buffer.did_save(version, fingerprint, mtime, cx);
                         })
                         .ok();
                 }

crates/language/src/buffer.rs 🔗

@@ -45,9 +45,9 @@ use text::operation_queue::OperationQueue;
 use text::*;
 pub use text::{
     Anchor, Bias, Buffer as TextBuffer, BufferId, BufferSnapshot as TextBufferSnapshot, Edit,
-    OffsetRangeExt, OffsetUtf16, Patch, Point, PointUtf16, Rope, Selection, SelectionGoal,
-    Subscription, TextDimension, TextSummary, ToOffset, ToOffsetUtf16, ToPoint, ToPointUtf16,
-    Transaction, TransactionId, Unclipped,
+    OffsetRangeExt, OffsetUtf16, Patch, Point, PointUtf16, Rope, RopeFingerprint, Selection,
+    SelectionGoal, Subscription, TextDimension, TextSummary, ToOffset, ToOffsetUtf16, ToPoint,
+    ToPointUtf16, Transaction, TransactionId, Unclipped,
 };
 use theme::SyntaxTheme;
 #[cfg(any(test, feature = "test-support"))]
@@ -87,6 +87,8 @@ pub struct Buffer {
     /// The version vector when this buffer was last loaded from
     /// or saved to disk.
     saved_version: clock::Global,
+    /// A hash of the current contents of the buffer's file.
+    file_fingerprint: RopeFingerprint,
     transaction_depth: usize,
     was_dirty_before_starting_transaction: Option<bool>,
     reload_task: Option<Task<Result<()>>>,
@@ -405,6 +407,7 @@ pub trait LocalFile: File {
         &self,
         buffer_id: BufferId,
         version: &clock::Global,
+        fingerprint: RopeFingerprint,
         line_ending: LineEnding,
         mtime: Option<SystemTime>,
         cx: &mut AppContext,
@@ -574,6 +577,7 @@ impl Buffer {
                 .ok_or_else(|| anyhow!("missing line_ending"))?,
         ));
         this.saved_version = proto::deserialize_version(&message.saved_version);
+        this.file_fingerprint = proto::deserialize_fingerprint(&message.saved_version_fingerprint)?;
         this.saved_mtime = message.saved_mtime.map(|time| time.into());
         Ok(this)
     }
@@ -587,6 +591,7 @@ impl Buffer {
             diff_base: self.diff_base.as_ref().map(|h| h.to_string()),
             line_ending: proto::serialize_line_ending(self.line_ending()) as i32,
             saved_version: proto::serialize_version(&self.saved_version),
+            saved_version_fingerprint: proto::serialize_fingerprint(self.file_fingerprint),
             saved_mtime: self.saved_mtime.map(|time| time.into()),
         }
     }
@@ -666,6 +671,7 @@ impl Buffer {
         Self {
             saved_mtime,
             saved_version: buffer.version(),
+            file_fingerprint: buffer.as_rope().fingerprint(),
             reload_task: None,
             transaction_depth: 0,
             was_dirty_before_starting_transaction: None,
@@ -740,6 +746,11 @@ impl Buffer {
         &self.saved_version
     }
 
+    /// The fingerprint of the buffer's text when the buffer was last saved or reloaded from disk.
+    pub fn saved_version_fingerprint(&self) -> RopeFingerprint {
+        self.file_fingerprint
+    }
+
     /// The mtime of the buffer's file when the buffer was last saved or reloaded from disk.
     pub fn saved_mtime(&self) -> Option<SystemTime> {
         self.saved_mtime
@@ -772,11 +783,13 @@ impl Buffer {
     pub fn did_save(
         &mut self,
         version: clock::Global,
+        fingerprint: RopeFingerprint,
         mtime: Option<SystemTime>,
         cx: &mut ModelContext<Self>,
     ) {
         self.saved_version = version;
         self.has_conflict = false;
+        self.file_fingerprint = fingerprint;
         self.saved_mtime = mtime;
         cx.emit(Event::Saved);
         cx.notify();
@@ -808,7 +821,13 @@ impl Buffer {
                     this.apply_diff(diff, cx);
                     tx.send(this.finalize_last_transaction().cloned()).ok();
                     this.has_conflict = false;
-                    this.did_reload(this.version(), this.line_ending(), new_mtime, cx);
+                    this.did_reload(
+                        this.version(),
+                        this.as_rope().fingerprint(),
+                        this.line_ending(),
+                        new_mtime,
+                        cx,
+                    );
                 } else {
                     if !diff.edits.is_empty()
                         || this
@@ -819,7 +838,13 @@ impl Buffer {
                         this.has_conflict = true;
                     }
 
-                    this.did_reload(prev_version, this.line_ending(), this.saved_mtime, cx);
+                    this.did_reload(
+                        prev_version,
+                        Rope::text_fingerprint(&new_text),
+                        this.line_ending(),
+                        this.saved_mtime,
+                        cx,
+                    );
                 }
 
                 this.reload_task.take();
@@ -832,17 +857,20 @@ impl Buffer {
     pub fn did_reload(
         &mut self,
         version: clock::Global,
+        fingerprint: RopeFingerprint,
         line_ending: LineEnding,
         mtime: Option<SystemTime>,
         cx: &mut ModelContext<Self>,
     ) {
         self.saved_version = version;
+        self.file_fingerprint = fingerprint;
         self.text.set_line_ending(line_ending);
         self.saved_mtime = mtime;
         if let Some(file) = self.file.as_ref().and_then(|f| f.as_local()) {
             file.buffer_reloaded(
                 self.remote_id(),
                 &self.saved_version,
+                self.file_fingerprint,
                 self.line_ending(),
                 self.saved_mtime,
                 cx,

crates/language/src/proto.rs 🔗

@@ -13,6 +13,17 @@ use text::*;
 
 pub use proto::{BufferState, Operation};
 
+/// Serializes a [`RopeFingerprint`] to be sent over RPC.
+pub fn serialize_fingerprint(fingerprint: RopeFingerprint) -> String {
+    fingerprint.to_hex()
+}
+
+/// Deserializes a [`RopeFingerprint`] from the RPC representation.
+pub fn deserialize_fingerprint(fingerprint: &str) -> Result<RopeFingerprint> {
+    RopeFingerprint::from_hex(fingerprint)
+        .map_err(|error| anyhow!("invalid fingerprint: {}", error))
+}
+
 /// Deserializes a `[text::LineEnding]` from the RPC representation.
 pub fn deserialize_line_ending(message: proto::LineEnding) -> text::LineEnding {
     match message {

crates/project/src/project.rs 🔗

@@ -37,8 +37,8 @@ use language::{
     language_settings::{language_settings, FormatOnSave, Formatter, InlayHintKind},
     markdown, point_to_lsp,
     proto::{
-        deserialize_anchor, deserialize_line_ending, deserialize_version, serialize_anchor,
-        serialize_version, split_operations,
+        deserialize_anchor, deserialize_fingerprint, deserialize_line_ending, deserialize_version,
+        serialize_anchor, serialize_version, split_operations,
     },
     range_from_lsp, Bias, Buffer, BufferSnapshot, CachedLspAdapter, Capability, CodeAction,
     CodeLabel, Completion, Diagnostic, DiagnosticEntry, DiagnosticSet, Diff, Documentation,
@@ -7922,6 +7922,7 @@ impl Project {
             buffer_id: buffer_id.into(),
             version: serialize_version(buffer.saved_version()),
             mtime: buffer.saved_mtime().map(|time| time.into()),
+            fingerprint: language::proto::serialize_fingerprint(buffer.saved_version_fingerprint()),
         })
     }
 
@@ -8014,6 +8015,9 @@ impl Project {
                             buffer_id: buffer_id.into(),
                             version: language::proto::serialize_version(buffer.saved_version()),
                             mtime: buffer.saved_mtime().map(|time| time.into()),
+                            fingerprint: language::proto::serialize_fingerprint(
+                                buffer.saved_version_fingerprint(),
+                            ),
                             line_ending: language::proto::serialize_line_ending(
                                 buffer.line_ending(),
                             ) as i32,
@@ -8890,6 +8894,7 @@ impl Project {
         _: Arc<Client>,
         mut cx: AsyncAppContext,
     ) -> Result<()> {
+        let fingerprint = deserialize_fingerprint(&envelope.payload.fingerprint)?;
         let version = deserialize_version(&envelope.payload.version);
         let buffer_id = BufferId::new(envelope.payload.buffer_id)?;
         let mtime = envelope.payload.mtime.map(|time| time.into());
@@ -8906,7 +8911,7 @@ impl Project {
                 });
             if let Some(buffer) = buffer {
                 buffer.update(cx, |buffer, cx| {
-                    buffer.did_save(version, mtime, cx);
+                    buffer.did_save(version, fingerprint, mtime, cx);
                 });
             }
             Ok(())
@@ -8921,6 +8926,7 @@ impl Project {
     ) -> Result<()> {
         let payload = envelope.payload;
         let version = deserialize_version(&payload.version);
+        let fingerprint = deserialize_fingerprint(&payload.fingerprint)?;
         let line_ending = deserialize_line_ending(
             proto::LineEnding::from_i32(payload.line_ending)
                 .ok_or_else(|| anyhow!("missing line ending"))?,
@@ -8940,7 +8946,7 @@ impl Project {
                 });
             if let Some(buffer) = buffer {
                 buffer.update(cx, |buffer, cx| {
-                    buffer.did_reload(version, line_ending, mtime, cx);
+                    buffer.did_reload(version, fingerprint, line_ending, mtime, cx);
                 });
             }
             Ok(())

crates/project/src/project_tests.rs 🔗

@@ -3118,7 +3118,12 @@ async fn test_buffer_is_dirty(cx: &mut gpui::TestAppContext) {
             &[language::Event::Edited, language::Event::DirtyChanged]
         );
         events.lock().clear();
-        buffer.did_save(buffer.version(), buffer.file().unwrap().mtime(), cx);
+        buffer.did_save(
+            buffer.version(),
+            buffer.as_rope().fingerprint(),
+            buffer.file().unwrap().mtime(),
+            cx,
+        );
     });
 
     // after saving, the buffer is not dirty, and emits a saved event.

crates/rope/Cargo.toml 🔗

@@ -13,6 +13,7 @@ path = "src/rope.rs"
 
 [dependencies]
 arrayvec = "0.7.1"
+bromberg_sl2 = { git = "https://github.com/zed-industries/bromberg_sl2", rev = "950bc5482c216c395049ae33ae4501e08975f17f" }
 log.workspace = true
 smallvec.workspace = true
 sum_tree.workspace = true

crates/rope/src/rope.rs 🔗

@@ -4,6 +4,7 @@ mod point_utf16;
 mod unclipped;
 
 use arrayvec::ArrayString;
+use bromberg_sl2::HashMatrix;
 use smallvec::SmallVec;
 use std::{
     cmp, fmt, io, mem,
@@ -24,6 +25,12 @@ const CHUNK_BASE: usize = 6;
 #[cfg(not(test))]
 const CHUNK_BASE: usize = 16;
 
+/// Type alias to [`HashMatrix`], an implementation of a homomorphic hash function. Two [`Rope`] instances
+/// containing the same text will produce the same fingerprint. This hash function is special in that
+/// it allows us to hash individual chunks and aggregate them up the [`Rope`]'s tree, with the resulting
+/// hash being equivalent to hashing all the text contained in the [`Rope`] at once.
+pub type RopeFingerprint = HashMatrix;
+
 #[derive(Clone, Default)]
 pub struct Rope {
     chunks: SumTree<Chunk>,
@@ -34,6 +41,10 @@ impl Rope {
         Self::default()
     }
 
+    pub fn text_fingerprint(text: &str) -> RopeFingerprint {
+        bromberg_sl2::hash_strict(text.as_bytes())
+    }
+
     pub fn append(&mut self, rope: Rope) {
         let mut chunks = rope.chunks.cursor::<()>();
         chunks.next(&());
@@ -412,6 +423,10 @@ impl Rope {
         self.clip_point(Point::new(row, u32::MAX), Bias::Left)
             .column
     }
+
+    pub fn fingerprint(&self) -> RopeFingerprint {
+        self.chunks.summary().fingerprint
+    }
 }
 
 impl<'a> From<&'a str> for Rope {
@@ -962,12 +977,14 @@ impl sum_tree::Item for Chunk {
 #[derive(Clone, Debug, Default, Eq, PartialEq)]
 pub struct ChunkSummary {
     text: TextSummary,
+    fingerprint: RopeFingerprint,
 }
 
 impl<'a> From<&'a str> for ChunkSummary {
     fn from(text: &'a str) -> Self {
         Self {
             text: TextSummary::from(text),
+            fingerprint: Rope::text_fingerprint(text),
         }
     }
 }
@@ -977,6 +994,7 @@ impl sum_tree::Summary for ChunkSummary {
 
     fn add_summary(&mut self, summary: &Self, _: &()) {
         self.text += &summary.text;
+        self.fingerprint = self.fingerprint * summary.fingerprint;
     }
 }
 

crates/rpc/proto/zed.proto 🔗

@@ -674,7 +674,7 @@ message BufferSaved {
     uint64 buffer_id = 2;
     repeated VectorClockEntry version = 3;
     Timestamp mtime = 4;
-    reserved 5;
+    string fingerprint = 5;
 }
 
 message BufferReloaded {
@@ -682,7 +682,7 @@ message BufferReloaded {
     uint64 buffer_id = 2;
     repeated VectorClockEntry version = 3;
     Timestamp mtime = 4;
-    reserved 5;
+    string fingerprint = 5;
     LineEnding line_ending = 6;
 }
 
@@ -1502,7 +1502,7 @@ message BufferState {
     optional string diff_base = 4;
     LineEnding line_ending = 5;
     repeated VectorClockEntry saved_version = 6;
-    reserved 7;
+    string saved_version_fingerprint = 7;
     Timestamp saved_mtime = 8;
 }
 

crates/worktree/src/worktree.rs 🔗

@@ -31,8 +31,12 @@ use gpui::{
 use ignore::IgnoreStack;
 use itertools::Itertools;
 use language::{
-    proto::{deserialize_version, serialize_line_ending, serialize_version},
-    Buffer, Capability, DiagnosticEntry, File as _, LineEnding, PointUtf16, Rope, Unclipped,
+    proto::{
+        deserialize_fingerprint, deserialize_version, serialize_fingerprint, serialize_line_ending,
+        serialize_version,
+    },
+    Buffer, Capability, DiagnosticEntry, File as _, LineEnding, PointUtf16, Rope, RopeFingerprint,
+    Unclipped,
 };
 use lsp::{DiagnosticSeverity, LanguageServerId};
 use parking_lot::Mutex;
@@ -1147,6 +1151,7 @@ impl LocalWorktree {
         }
 
         let text = buffer.as_rope().clone();
+        let fingerprint = text.fingerprint();
         let version = buffer.version();
         let save = self.write_file(path.as_ref(), text, buffer.line_ending(), cx);
         let fs = Arc::clone(&self.fs);
@@ -1209,11 +1214,12 @@ impl LocalWorktree {
                     buffer_id,
                     version: serialize_version(&version),
                     mtime: mtime.map(|time| time.into()),
+                    fingerprint: serialize_fingerprint(fingerprint),
                 })?;
             }
 
             buffer_handle.update(&mut cx, |buffer, cx| {
-                buffer.did_save(version.clone(), mtime, cx);
+                buffer.did_save(version.clone(), fingerprint, mtime, cx);
             })?;
 
             Ok(())
@@ -1614,10 +1620,11 @@ impl RemoteWorktree {
                 })
                 .await?;
             let version = deserialize_version(&response.version);
+            let fingerprint = deserialize_fingerprint(&response.fingerprint)?;
             let mtime = response.mtime.map(|mtime| mtime.into());
 
             buffer_handle.update(&mut cx, |buffer, cx| {
-                buffer.did_save(version.clone(), mtime, cx);
+                buffer.did_save(version.clone(), fingerprint, mtime, cx);
             })?;
 
             Ok(())
@@ -2999,6 +3006,7 @@ impl language::LocalFile for File {
         &self,
         buffer_id: BufferId,
         version: &clock::Global,
+        fingerprint: RopeFingerprint,
         line_ending: LineEnding,
         mtime: Option<SystemTime>,
         cx: &mut AppContext,
@@ -3012,6 +3020,7 @@ impl language::LocalFile for File {
                     buffer_id: buffer_id.into(),
                     version: serialize_version(version),
                     mtime: mtime.map(|time| time.into()),
+                    fingerprint: serialize_fingerprint(fingerprint),
                     line_ending: serialize_line_ending(line_ending) as i32,
                 })
                 .log_err();