diff --git a/Cargo.lock b/Cargo.lock index e7a6920e..06c232d6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -621,6 +621,7 @@ dependencies = [ "dev-utils", "flate2", "loro 0.16.2", + "rand", "serde_json", "tabled 0.15.0", "tracing", diff --git a/crates/examples/examples/sheet.rs b/crates/examples/examples/sheet.rs index f25cda58..9827875b 100644 --- a/crates/examples/examples/sheet.rs +++ b/crates/examples/examples/sheet.rs @@ -1,4 +1,4 @@ -use dev_utils::get_mem_usage; +use dev_utils::{get_mem_usage, ByteSize}; use examples::sheet::init_large_sheet; use loro::ID; @@ -59,4 +59,7 @@ pub fn main() { doc.check_state_correctness_slow(); let after_checkout = get_mem_usage(); println!("Allocated bytes after checkout: {}", after_checkout); + + let snapshot = doc.export_snapshot(); + println!("Snapshot size: {}", ByteSize(snapshot.len())); } diff --git a/crates/fractional_index/src/lib.rs b/crates/fractional_index/src/lib.rs index 984ca550..d7b3835a 100644 --- a/crates/fractional_index/src/lib.rs +++ b/crates/fractional_index/src/lib.rs @@ -10,7 +10,7 @@ mod jitter; const TERMINATOR: u8 = 128; -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord)] +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct FractionalIndex(Arc>); diff --git a/crates/fuzz/fuzz/Cargo.lock b/crates/fuzz/fuzz/Cargo.lock index bbf0bb87..712b6b95 100644 --- a/crates/fuzz/fuzz/Cargo.lock +++ b/crates/fuzz/fuzz/Cargo.lock @@ -940,9 +940,9 @@ dependencies = [ [[package]] name = "serde_columnar" -version = "0.3.5" +version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06a86f5f6dc16d8308c37e145dd4c7e60fba1486d84982519388d31ea0ad6703" +checksum = "3748cbf2b43a15ee9627881cabd7820d50508781cdebd3bb54cea49215d367a1" dependencies = [ "itertools 0.11.0", "postcard", diff --git a/crates/loro-internal/src/encoding/arena.rs b/crates/loro-internal/src/encoding/arena.rs index b6dafd9d..bb3e303f 100644 --- a/crates/loro-internal/src/encoding/arena.rs +++ b/crates/loro-internal/src/encoding/arena.rs @@ -1,6 +1,6 @@ use std::{borrow::Cow, ops::Deref}; -use crate::{InternalString}; +use crate::InternalString; use fxhash::FxHashSet; use itertools::Itertools; use loro_common::{ContainerID, ContainerType, Counter, LoroError, LoroResult, PeerID, ID}; @@ -452,16 +452,17 @@ pub(super) struct PositionDelta<'a> { #[derive(Default)] #[columnar(ser, de)] -pub(super) struct PositionArena<'a> { +pub(crate) struct PositionArena<'a> { #[columnar(class = "vec", iter = "PositionDelta<'a>")] pub(super) positions: Vec>, } impl<'a> PositionArena<'a> { - pub fn from_positions(positions: Vec<&'a [u8]>) -> Self { - let mut ans = Vec::with_capacity(positions.len()); + pub fn from_positions(positions: impl IntoIterator) -> Self { + let iter = positions.into_iter(); + let mut ans = Vec::with_capacity(iter.size_hint().0); let mut last_bytes: &[u8] = &[]; - for p in positions { + for p in iter { let common = longest_common_prefix_length(last_bytes, p); let rest = &p[common..]; last_bytes = p; diff --git a/crates/loro-internal/src/encoding/encode_reordered.rs b/crates/loro-internal/src/encoding/encode_reordered.rs index 14ee8e51..0923d8d4 100644 --- a/crates/loro-internal/src/encoding/encode_reordered.rs +++ b/crates/loro-internal/src/encoding/encode_reordered.rs @@ -1380,7 +1380,7 @@ pub(crate) fn decode_op( Some(parent_id) }; - let fi = FractionalIndex::from_bytes(op.fractional_index); + let fi = FractionalIndex::from_bytes(positions[op.position_idx].clone()); let is_create = subject.id() == op_id; let ans = if is_create { TreeOp::Create { diff --git a/crates/loro-internal/src/encoding/value.rs b/crates/loro-internal/src/encoding/value.rs index 675e0921..b0a6309c 100644 --- a/crates/loro-internal/src/encoding/value.rs +++ b/crates/loro-internal/src/encoding/value.rs @@ -473,7 +473,7 @@ pub struct RawTreeMove { pub is_parent_null: bool, pub parent_peer_idx: usize, pub parent_cnt: Counter, - pub fractional_index: Vec, + pub position_idx: usize, } impl RawTreeMove { @@ -978,7 +978,7 @@ impl<'a> ValueReader<'a> { fn read_raw_tree_move(&mut self) -> LoroResult { let subject_peer_idx = self.read_usize()?; let subject_cnt = self.read_usize()?; - let fractional_index = self.read_binary_vec()?; + let position_idx = self.read_usize()?; let is_parent_null = self.read_u8()? != 0; let mut parent_peer_idx = 0; let mut parent_cnt = 0; @@ -990,7 +990,7 @@ impl<'a> ValueReader<'a> { Ok(RawTreeMove { subject_peer_idx, subject_cnt: subject_cnt as i32, - fractional_index, + position_idx, is_parent_null, parent_peer_idx, parent_cnt: parent_cnt as i32, @@ -1132,9 +1132,7 @@ impl ValueWriter { let len = self.buffer.len(); self.write_usize(op.subject_peer_idx); self.write_usize(op.subject_cnt as usize); - self.write_usize(op.fractional_index.len()); - self.buffer.extend_from_slice(&op.fractional_index); - + self.write_usize(op.position_idx); self.write_u8(op.is_parent_null as u8); if op.is_parent_null { return self.buffer.len() - len; diff --git a/crates/loro-internal/src/loro.rs b/crates/loro-internal/src/loro.rs index acf1e87d..4a9e8825 100644 --- a/crates/loro-internal/src/loro.rs +++ b/crates/loro-internal/src/loro.rs @@ -1157,7 +1157,7 @@ impl LoroDoc { /// /// Panic when it's not consistent pub fn check_state_diff_calc_consistency_slow(&self) { - #[cfg(any(test, debug_assertions))] + // #[cfg(any(test, debug_assertions))] { static IS_CHECKING: AtomicBool = AtomicBool::new(false); if IS_CHECKING.load(std::sync::atomic::Ordering::Acquire) { diff --git a/crates/loro-internal/src/oplog/change_store/block_encode.rs b/crates/loro-internal/src/oplog/change_store/block_encode.rs index fdb21e26..c7e087c7 100644 --- a/crates/loro-internal/src/oplog/change_store/block_encode.rs +++ b/crates/loro-internal/src/oplog/change_store/block_encode.rs @@ -38,6 +38,9 @@ //! ┌────────────────────┬─────────────────────────────────────────┐ //! │ Key Strings Size │ Key Strings │ //! └────────────────────┴─────────────────────────────────────────┘ +//! ┌────────────────────┬─────────────────────────────────────────┐ +//! │ Position Size │ Position │ +//! └────────────────────┴─────────────────────────────────────────┘ //! ┌────────┬──────────┬──────────┬───────┬───────────────────────┐ //! │ │ │ │ │ │ //! │ │ │ │ │ │ @@ -64,8 +67,11 @@ //! use std::borrow::Cow; +use std::collections::BTreeSet; use std::io::Write; +use fractional_index::FractionalIndex; +use fxhash::FxHashSet; use loro_common::{ ContainerID, Counter, HasCounterSpan, HasLamportSpan, InternalString, Lamport, LoroError, LoroResult, PeerID, TreeID, ID, @@ -79,7 +85,7 @@ use super::delta_rle_encode::{UnsignedDeltaDecoder, UnsignedDeltaEncoder}; use crate::arena::SharedArena; use crate::change::{Change, Timestamp}; use crate::container::tree::tree_op; -use crate::encoding::arena::ContainerArena; +use crate::encoding::arena::{ContainerArena, PositionArena}; use crate::encoding::value_register::ValueRegister; use crate::encoding::{ self, decode_op, encode_op, get_op_prop, EncodedDeleteStartId, IterableEncodedDeleteStartId, @@ -124,6 +130,8 @@ struct EncodedBlock<'a> { #[serde(borrow)] keys: Cow<'a, [u8]>, #[serde(borrow)] + positions: Cow<'a, [u8]>, + #[serde(borrow)] ops: Cow<'a, [u8]>, #[serde(borrow)] values: Cow<'a, [u8]>, @@ -183,8 +191,33 @@ pub fn encode_block(block: &[Change], arena: &SharedArena) -> Vec { peer_register, key_register: ValueRegister::new(), cid_register, + position_register: ValueRegister::new(), }; + { + // Init position register, making it ordered by fractional index + let mut position_set = BTreeSet::default(); + for c in block { + for op in c.ops().iter() { + if let crate::op::InnerContent::Tree(tree_op) = &op.content { + match tree_op { + tree_op::TreeOp::Create { position, .. } => { + position_set.insert(position.clone()); + } + tree_op::TreeOp::Move { position, .. } => { + position_set.insert(position.clone()); + } + tree_op::TreeOp::Delete { .. } => {} + } + } + } + } + + for position in position_set { + registers.position_register.register(&position); + } + } + let mut del_starts: Vec<_> = Vec::new(); let mut value_writer = ValueWriter::new(); for c in block { @@ -233,6 +266,13 @@ pub fn encode_block(block: &[Change], arena: &SharedArena) -> Vec { let keys = registers.key_register.unwrap_vec(); let keys_bytes = encode_keys(keys); + // ┌────────────────────┬─────────────────────────────────────────┐ + // │ Position Size │ Position │ + // └────────────────────┴─────────────────────────────────────────┘ + let position_vec = registers.position_register.unwrap_vec(); + let positions = PositionArena::from_positions(position_vec.iter().map(|p| p.as_bytes())); + let position_bytes = positions.encode(); + // ┌──────────┬──────────┬───────┬────────────────────────────────┐ // │ │ │ │ │ // │ │ │ │ │ @@ -275,6 +315,7 @@ pub fn encode_block(block: &[Change], arena: &SharedArena) -> Vec { commit_msgs: Cow::Owned(vec![]), cids: container_arena.encode().into(), keys: keys_bytes.into(), + positions: position_bytes.into(), ops: ops_bytes.into(), values: value_bytes.into(), }; @@ -307,6 +348,7 @@ struct Registers { peer_register: ValueRegister, key_register: ValueRegister, cid_register: ValueRegister, + position_register: ValueRegister, } use crate::encoding::value::{ @@ -335,8 +377,7 @@ impl ValueEncodeRegister for Registers { is_parent_null: parent.is_none(), parent_peer_idx: parent.map_or(0, |p| self.peer_register.register(&p.peer)), parent_cnt: parent.map_or(0, |p| p.counter), - // PERF: maybe we can use Bytes for position - fractional_index: position.as_bytes().into(), + position_idx: self.position_register.register(position), }), tree_op::TreeOp::Move { target, @@ -348,8 +389,7 @@ impl ValueEncodeRegister for Registers { is_parent_null: parent.is_none(), parent_peer_idx: parent.map_or(0, |p| self.peer_register.register(&p.peer)), parent_cnt: parent.map_or(0, |p| p.counter), - // PERF: maybe we can use Bytes for position - fractional_index: position.as_bytes().into(), + position_idx: self.position_register.register(position), }), tree_op::TreeOp::Delete { target } => { let parent = TreeID::delete_root(); @@ -359,7 +399,7 @@ impl ValueEncodeRegister for Registers { is_parent_null: false, parent_peer_idx: self.peer_register.register(&parent.peer), parent_cnt: parent.counter, - fractional_index: Vec::new(), + position_idx: 0, }) } } @@ -641,6 +681,7 @@ pub fn decode_block( keys, ops, values, + positions, .. } = doc; let mut changes = Vec::with_capacity(n_changes as usize); @@ -654,6 +695,8 @@ pub fn decode_block( peers: &header.peers, keys, }; + let positions = PositionArena::decode(&positions)?; + let positions = positions.parse_to_positions(); let cids: &Vec = header.cids.get_or_init(|| { ContainerArena::decode(&cids) .unwrap() @@ -701,7 +744,7 @@ pub fn decode_block( &mut del_iter, shared_arena, &decode_arena, - &[], + &positions, prop, ID::new(peer, counter), )?;