Introduce a new `fingerprint` field to `TextSummary`

Antonio Scandurra created

This is calculated in `Rope` and uses the `bromberg_sl2` homomorphic
hash function to determine the fingerprint of a single chunk and
compose each chunk fingerprint into a single fingerprint for the entire
rope that is equivalent to hashing all the rope's bytes at once.

Change summary

Cargo.lock                                | 20 ++++++++++++++++++++
crates/editor/src/display_map/fold_map.rs | 14 +-------------
crates/editor/src/multi_buffer.rs         | 10 +---------
crates/text/Cargo.toml                    |  2 ++
crates/text/src/rope.rs                   |  4 ++++
crates/text/src/tests.rs                  |  5 +++++
6 files changed, 33 insertions(+), 22 deletions(-)

Detailed changes

Cargo.lock 🔗

@@ -561,6 +561,18 @@ dependencies = [
  "workspace",
 ]
 
+[[package]]
+name = "bromberg_sl2"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2ed88064f69518b7e3ea50ecfc1b61d43f19248618a377b95ae5c8b611134d4d"
+dependencies = [
+ "digest 0.9.0",
+ "lazy_static",
+ "rayon",
+ "seq-macro",
+]
+
 [[package]]
 name = "bstr"
 version = "0.2.17"
@@ -4156,6 +4168,12 @@ dependencies = [
  "pest",
 ]
 
+[[package]]
+name = "seq-macro"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a9f47faea3cad316faa914d013d24f471cd90bfca1a0c70f05a3f42c6441e99"
+
 [[package]]
 name = "serde"
 version = "1.0.137"
@@ -4806,9 +4824,11 @@ version = "0.1.0"
 dependencies = [
  "anyhow",
  "arrayvec 0.7.2",
+ "bromberg_sl2",
  "clock",
  "collections",
  "ctor",
+ "digest 0.9.0",
  "env_logger",
  "gpui",
  "lazy_static",

crates/editor/src/display_map/fold_map.rs 🔗

@@ -370,22 +370,10 @@ impl FoldMap {
 
                     if fold.end > fold.start {
                         let output_text = "…";
-                        let chars = output_text.chars().count() as u32;
-                        let lines = Point::new(0, output_text.len() as u32);
-                        let lines_utf16 =
-                            PointUtf16::new(0, output_text.encode_utf16().count() as u32);
                         new_transforms.push(
                             Transform {
                                 summary: TransformSummary {
-                                    output: TextSummary {
-                                        bytes: output_text.len(),
-                                        lines,
-                                        lines_utf16,
-                                        first_line_chars: chars,
-                                        last_line_chars: chars,
-                                        longest_row: 0,
-                                        longest_row_chars: chars,
-                                    },
+                                    output: TextSummary::from(output_text),
                                     input: new_buffer.text_summary_for_range(fold.start..fold.end),
                                 },
                                 output_text: Some(output_text),

crates/editor/src/multi_buffer.rs 🔗

@@ -1923,15 +1923,7 @@ impl MultiBufferSnapshot {
             );
 
             if range.end > end_before_newline {
-                summary.add_assign(&D::from_text_summary(&TextSummary {
-                    bytes: 1,
-                    lines: Point::new(1 as u32, 0),
-                    lines_utf16: PointUtf16::new(1 as u32, 0),
-                    first_line_chars: 0,
-                    last_line_chars: 0,
-                    longest_row: 0,
-                    longest_row_chars: 0,
-                }));
+                summary.add_assign(&D::from_text_summary(&TextSummary::from("\n")));
             }
 
             cursor.next(&());

crates/text/Cargo.toml 🔗

@@ -16,6 +16,8 @@ collections = { path = "../collections" }
 sum_tree = { path = "../sum_tree" }
 anyhow = "1.0.38"
 arrayvec = "0.7.1"
+digest = { version = "0.9", features = ["std"] }
+bromberg_sl2 = "0.6"
 lazy_static = "1.4"
 log = { version = "0.4.16", features = ["kv_unstable_serde"] }
 parking_lot = "0.11"

crates/text/src/rope.rs 🔗

@@ -2,6 +2,7 @@ use crate::PointUtf16;
 
 use super::Point;
 use arrayvec::ArrayString;
+use bromberg_sl2::HashMatrix;
 use smallvec::SmallVec;
 use std::{cmp, fmt, io, mem, ops::Range, str};
 use sum_tree::{Bias, Dimension, SumTree};
@@ -725,6 +726,7 @@ pub struct TextSummary {
     pub last_line_chars: u32,
     pub longest_row: u32,
     pub longest_row_chars: u32,
+    pub fingerprint: HashMatrix,
 }
 
 impl<'a> From<&'a str> for TextSummary {
@@ -764,6 +766,7 @@ impl<'a> From<&'a str> for TextSummary {
             last_line_chars,
             longest_row,
             longest_row_chars,
+            fingerprint: bromberg_sl2::hash_strict(text.as_bytes()),
         }
     }
 }
@@ -810,6 +813,7 @@ impl<'a> std::ops::AddAssign<&'a Self> for TextSummary {
         self.bytes += other.bytes;
         self.lines += other.lines;
         self.lines_utf16 += other.lines_utf16;
+        self.fingerprint = self.fingerprint * other.fingerprint;
     }
 }
 

crates/text/src/tests.rs 🔗

@@ -226,6 +226,7 @@ fn test_text_summary_for_range() {
             last_line_chars: 0,
             longest_row: 0,
             longest_row_chars: 1,
+            fingerprint: bromberg_sl2::hash_strict(b"b\n")
         }
     );
     assert_eq!(
@@ -238,6 +239,7 @@ fn test_text_summary_for_range() {
             last_line_chars: 0,
             longest_row: 2,
             longest_row_chars: 4,
+            fingerprint: bromberg_sl2::hash_strict(b"b\nefg\nhklm\n")
         }
     );
     assert_eq!(
@@ -250,6 +252,7 @@ fn test_text_summary_for_range() {
             last_line_chars: 1,
             longest_row: 3,
             longest_row_chars: 6,
+            fingerprint: bromberg_sl2::hash_strict(b"ab\nefg\nhklm\nnopqrs\nt")
         }
     );
     assert_eq!(
@@ -262,6 +265,7 @@ fn test_text_summary_for_range() {
             last_line_chars: 3,
             longest_row: 3,
             longest_row_chars: 6,
+            fingerprint: bromberg_sl2::hash_strict(b"ab\nefg\nhklm\nnopqrs\ntuv")
         }
     );
     assert_eq!(
@@ -274,6 +278,7 @@ fn test_text_summary_for_range() {
             last_line_chars: 3,
             longest_row: 1,
             longest_row_chars: 6,
+            fingerprint: bromberg_sl2::hash_strict(b"hklm\nnopqrs\ntuv")
         }
     );
 }