From c31a233aadad2811cec71d95b01be990e363bc88 Mon Sep 17 00:00:00 2001 From: Antonio Scandurra Date: Fri, 17 Jun 2022 11:45:26 +0200 Subject: [PATCH] Introduce a new `fingerprint` field to `TextSummary` This is calculated in `Rope` and uses the `bromberg_sl2` homomorphic hash function to determine the fingerprint of a single chunk and compose each chunk fingerprint into a single fingerprint for the entire rope that is equivalent to hashing all the rope's bytes at once. --- Cargo.lock | 20 ++++++++++++++++++++ crates/editor/src/display_map/fold_map.rs | 14 +------------- crates/editor/src/multi_buffer.rs | 10 +--------- crates/text/Cargo.toml | 2 ++ crates/text/src/rope.rs | 4 ++++ crates/text/src/tests.rs | 5 +++++ 6 files changed, 33 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 352c3310ff..374fbf85b5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -561,6 +561,18 @@ dependencies = [ "workspace", ] +[[package]] +name = "bromberg_sl2" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ed88064f69518b7e3ea50ecfc1b61d43f19248618a377b95ae5c8b611134d4d" +dependencies = [ + "digest 0.9.0", + "lazy_static", + "rayon", + "seq-macro", +] + [[package]] name = "bstr" version = "0.2.17" @@ -4156,6 +4168,12 @@ dependencies = [ "pest", ] +[[package]] +name = "seq-macro" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a9f47faea3cad316faa914d013d24f471cd90bfca1a0c70f05a3f42c6441e99" + [[package]] name = "serde" version = "1.0.137" @@ -4806,9 +4824,11 @@ version = "0.1.0" dependencies = [ "anyhow", "arrayvec 0.7.2", + "bromberg_sl2", "clock", "collections", "ctor", + "digest 0.9.0", "env_logger", "gpui", "lazy_static", diff --git a/crates/editor/src/display_map/fold_map.rs b/crates/editor/src/display_map/fold_map.rs index 3f9be8ad2d..16e4915d10 100644 --- a/crates/editor/src/display_map/fold_map.rs +++ b/crates/editor/src/display_map/fold_map.rs @@ -370,22 +370,10 @@ impl FoldMap { if fold.end > fold.start { let output_text = "…"; - let chars = output_text.chars().count() as u32; - let lines = Point::new(0, output_text.len() as u32); - let lines_utf16 = - PointUtf16::new(0, output_text.encode_utf16().count() as u32); new_transforms.push( Transform { summary: TransformSummary { - output: TextSummary { - bytes: output_text.len(), - lines, - lines_utf16, - first_line_chars: chars, - last_line_chars: chars, - longest_row: 0, - longest_row_chars: chars, - }, + output: TextSummary::from(output_text), input: new_buffer.text_summary_for_range(fold.start..fold.end), }, output_text: Some(output_text), diff --git a/crates/editor/src/multi_buffer.rs b/crates/editor/src/multi_buffer.rs index 88bfe28a27..2c7023d37e 100644 --- a/crates/editor/src/multi_buffer.rs +++ b/crates/editor/src/multi_buffer.rs @@ -1923,15 +1923,7 @@ impl MultiBufferSnapshot { ); if range.end > end_before_newline { - summary.add_assign(&D::from_text_summary(&TextSummary { - bytes: 1, - lines: Point::new(1 as u32, 0), - lines_utf16: PointUtf16::new(1 as u32, 0), - first_line_chars: 0, - last_line_chars: 0, - longest_row: 0, - longest_row_chars: 0, - })); + summary.add_assign(&D::from_text_summary(&TextSummary::from("\n"))); } cursor.next(&()); diff --git a/crates/text/Cargo.toml b/crates/text/Cargo.toml index 5f4bb4715d..cbbd65027e 100644 --- a/crates/text/Cargo.toml +++ b/crates/text/Cargo.toml @@ -16,6 +16,8 @@ collections = { path = "../collections" } sum_tree = { path = "../sum_tree" } anyhow = "1.0.38" arrayvec = "0.7.1" +digest = { version = "0.9", features = ["std"] } +bromberg_sl2 = "0.6" lazy_static = "1.4" log = { version = "0.4.16", features = ["kv_unstable_serde"] } parking_lot = "0.11" diff --git a/crates/text/src/rope.rs b/crates/text/src/rope.rs index ffb3439f3e..cb81752bce 100644 --- a/crates/text/src/rope.rs +++ b/crates/text/src/rope.rs @@ -2,6 +2,7 @@ use crate::PointUtf16; use super::Point; use arrayvec::ArrayString; +use bromberg_sl2::HashMatrix; use smallvec::SmallVec; use std::{cmp, fmt, io, mem, ops::Range, str}; use sum_tree::{Bias, Dimension, SumTree}; @@ -725,6 +726,7 @@ pub struct TextSummary { pub last_line_chars: u32, pub longest_row: u32, pub longest_row_chars: u32, + pub fingerprint: HashMatrix, } impl<'a> From<&'a str> for TextSummary { @@ -764,6 +766,7 @@ impl<'a> From<&'a str> for TextSummary { last_line_chars, longest_row, longest_row_chars, + fingerprint: bromberg_sl2::hash_strict(text.as_bytes()), } } } @@ -810,6 +813,7 @@ impl<'a> std::ops::AddAssign<&'a Self> for TextSummary { self.bytes += other.bytes; self.lines += other.lines; self.lines_utf16 += other.lines_utf16; + self.fingerprint = self.fingerprint * other.fingerprint; } } diff --git a/crates/text/src/tests.rs b/crates/text/src/tests.rs index e66837f21b..ee260afa6e 100644 --- a/crates/text/src/tests.rs +++ b/crates/text/src/tests.rs @@ -226,6 +226,7 @@ fn test_text_summary_for_range() { last_line_chars: 0, longest_row: 0, longest_row_chars: 1, + fingerprint: bromberg_sl2::hash_strict(b"b\n") } ); assert_eq!( @@ -238,6 +239,7 @@ fn test_text_summary_for_range() { last_line_chars: 0, longest_row: 2, longest_row_chars: 4, + fingerprint: bromberg_sl2::hash_strict(b"b\nefg\nhklm\n") } ); assert_eq!( @@ -250,6 +252,7 @@ fn test_text_summary_for_range() { last_line_chars: 1, longest_row: 3, longest_row_chars: 6, + fingerprint: bromberg_sl2::hash_strict(b"ab\nefg\nhklm\nnopqrs\nt") } ); assert_eq!( @@ -262,6 +265,7 @@ fn test_text_summary_for_range() { last_line_chars: 3, longest_row: 3, longest_row_chars: 6, + fingerprint: bromberg_sl2::hash_strict(b"ab\nefg\nhklm\nnopqrs\ntuv") } ); assert_eq!( @@ -274,6 +278,7 @@ fn test_text_summary_for_range() { last_line_chars: 3, longest_row: 1, longest_row_chars: 6, + fingerprint: bromberg_sl2::hash_strict(b"hklm\nnopqrs\ntuv") } ); }