perf: optimize how to store fractional index in change block

This commit is contained in:
Zixuan Chen 2024-08-10 15:40:56 +08:00
parent c8bab8f663
commit 0bac73b029
No known key found for this signature in database
9 changed files with 70 additions and 24 deletions

1
Cargo.lock generated
View file

@ -621,6 +621,7 @@ dependencies = [
"dev-utils",
"flate2",
"loro 0.16.2",
"rand",
"serde_json",
"tabled 0.15.0",
"tracing",

View file

@ -1,4 +1,4 @@
use dev_utils::get_mem_usage;
use dev_utils::{get_mem_usage, ByteSize};
use examples::sheet::init_large_sheet;
use loro::ID;
@ -59,4 +59,7 @@ pub fn main() {
doc.check_state_correctness_slow();
let after_checkout = get_mem_usage();
println!("Allocated bytes after checkout: {}", after_checkout);
let snapshot = doc.export_snapshot();
println!("Snapshot size: {}", ByteSize(snapshot.len()));
}

View file

@ -10,7 +10,7 @@ mod jitter;
const TERMINATOR: u8 = 128;
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord)]
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct FractionalIndex(Arc<Vec<u8>>);

View file

@ -940,9 +940,9 @@ dependencies = [
[[package]]
name = "serde_columnar"
version = "0.3.5"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06a86f5f6dc16d8308c37e145dd4c7e60fba1486d84982519388d31ea0ad6703"
checksum = "3748cbf2b43a15ee9627881cabd7820d50508781cdebd3bb54cea49215d367a1"
dependencies = [
"itertools 0.11.0",
"postcard",

View file

@ -1,6 +1,6 @@
use std::{borrow::Cow, ops::Deref};
use crate::{InternalString};
use crate::InternalString;
use fxhash::FxHashSet;
use itertools::Itertools;
use loro_common::{ContainerID, ContainerType, Counter, LoroError, LoroResult, PeerID, ID};
@ -452,16 +452,17 @@ pub(super) struct PositionDelta<'a> {
#[derive(Default)]
#[columnar(ser, de)]
pub(super) struct PositionArena<'a> {
pub(crate) struct PositionArena<'a> {
#[columnar(class = "vec", iter = "PositionDelta<'a>")]
pub(super) positions: Vec<PositionDelta<'a>>,
}
impl<'a> PositionArena<'a> {
pub fn from_positions(positions: Vec<&'a [u8]>) -> Self {
let mut ans = Vec::with_capacity(positions.len());
pub fn from_positions(positions: impl IntoIterator<Item = &'a [u8]>) -> Self {
let iter = positions.into_iter();
let mut ans = Vec::with_capacity(iter.size_hint().0);
let mut last_bytes: &[u8] = &[];
for p in positions {
for p in iter {
let common = longest_common_prefix_length(last_bytes, p);
let rest = &p[common..];
last_bytes = p;

View file

@ -1380,7 +1380,7 @@ pub(crate) fn decode_op(
Some(parent_id)
};
let fi = FractionalIndex::from_bytes(op.fractional_index);
let fi = FractionalIndex::from_bytes(positions[op.position_idx].clone());
let is_create = subject.id() == op_id;
let ans = if is_create {
TreeOp::Create {

View file

@ -473,7 +473,7 @@ pub struct RawTreeMove {
pub is_parent_null: bool,
pub parent_peer_idx: usize,
pub parent_cnt: Counter,
pub fractional_index: Vec<u8>,
pub position_idx: usize,
}
impl RawTreeMove {
@ -978,7 +978,7 @@ impl<'a> ValueReader<'a> {
fn read_raw_tree_move(&mut self) -> LoroResult<RawTreeMove> {
let subject_peer_idx = self.read_usize()?;
let subject_cnt = self.read_usize()?;
let fractional_index = self.read_binary_vec()?;
let position_idx = self.read_usize()?;
let is_parent_null = self.read_u8()? != 0;
let mut parent_peer_idx = 0;
let mut parent_cnt = 0;
@ -990,7 +990,7 @@ impl<'a> ValueReader<'a> {
Ok(RawTreeMove {
subject_peer_idx,
subject_cnt: subject_cnt as i32,
fractional_index,
position_idx,
is_parent_null,
parent_peer_idx,
parent_cnt: parent_cnt as i32,
@ -1132,9 +1132,7 @@ impl ValueWriter {
let len = self.buffer.len();
self.write_usize(op.subject_peer_idx);
self.write_usize(op.subject_cnt as usize);
self.write_usize(op.fractional_index.len());
self.buffer.extend_from_slice(&op.fractional_index);
self.write_usize(op.position_idx);
self.write_u8(op.is_parent_null as u8);
if op.is_parent_null {
return self.buffer.len() - len;

View file

@ -1157,7 +1157,7 @@ impl LoroDoc {
///
/// Panic when it's not consistent
pub fn check_state_diff_calc_consistency_slow(&self) {
#[cfg(any(test, debug_assertions))]
// #[cfg(any(test, debug_assertions))]
{
static IS_CHECKING: AtomicBool = AtomicBool::new(false);
if IS_CHECKING.load(std::sync::atomic::Ordering::Acquire) {

View file

@ -38,6 +38,9 @@
//! ┌────────────────────┬─────────────────────────────────────────┐
//! │ Key Strings Size │ Key Strings │
//! └────────────────────┴─────────────────────────────────────────┘
//! ┌────────────────────┬─────────────────────────────────────────┐
//! │ Position Size │ Position │
//! └────────────────────┴─────────────────────────────────────────┘
//! ┌────────┬──────────┬──────────┬───────┬───────────────────────┐
//! │ │ │ │ │ │
//! │ │ │ │ │ │
@ -64,8 +67,11 @@
//!
use std::borrow::Cow;
use std::collections::BTreeSet;
use std::io::Write;
use fractional_index::FractionalIndex;
use fxhash::FxHashSet;
use loro_common::{
ContainerID, Counter, HasCounterSpan, HasLamportSpan, InternalString, Lamport, LoroError,
LoroResult, PeerID, TreeID, ID,
@ -79,7 +85,7 @@ use super::delta_rle_encode::{UnsignedDeltaDecoder, UnsignedDeltaEncoder};
use crate::arena::SharedArena;
use crate::change::{Change, Timestamp};
use crate::container::tree::tree_op;
use crate::encoding::arena::ContainerArena;
use crate::encoding::arena::{ContainerArena, PositionArena};
use crate::encoding::value_register::ValueRegister;
use crate::encoding::{
self, decode_op, encode_op, get_op_prop, EncodedDeleteStartId, IterableEncodedDeleteStartId,
@ -124,6 +130,8 @@ struct EncodedBlock<'a> {
#[serde(borrow)]
keys: Cow<'a, [u8]>,
#[serde(borrow)]
positions: Cow<'a, [u8]>,
#[serde(borrow)]
ops: Cow<'a, [u8]>,
#[serde(borrow)]
values: Cow<'a, [u8]>,
@ -183,8 +191,33 @@ pub fn encode_block(block: &[Change], arena: &SharedArena) -> Vec<u8> {
peer_register,
key_register: ValueRegister::new(),
cid_register,
position_register: ValueRegister::new(),
};
{
// Init position register, making it ordered by fractional index
let mut position_set = BTreeSet::default();
for c in block {
for op in c.ops().iter() {
if let crate::op::InnerContent::Tree(tree_op) = &op.content {
match tree_op {
tree_op::TreeOp::Create { position, .. } => {
position_set.insert(position.clone());
}
tree_op::TreeOp::Move { position, .. } => {
position_set.insert(position.clone());
}
tree_op::TreeOp::Delete { .. } => {}
}
}
}
}
for position in position_set {
registers.position_register.register(&position);
}
}
let mut del_starts: Vec<_> = Vec::new();
let mut value_writer = ValueWriter::new();
for c in block {
@ -233,6 +266,13 @@ pub fn encode_block(block: &[Change], arena: &SharedArena) -> Vec<u8> {
let keys = registers.key_register.unwrap_vec();
let keys_bytes = encode_keys(keys);
// ┌────────────────────┬─────────────────────────────────────────┐
// │ Position Size │ Position │
// └────────────────────┴─────────────────────────────────────────┘
let position_vec = registers.position_register.unwrap_vec();
let positions = PositionArena::from_positions(position_vec.iter().map(|p| p.as_bytes()));
let position_bytes = positions.encode();
// ┌──────────┬──────────┬───────┬────────────────────────────────┐
// │ │ │ │ │
// │ │ │ │ │
@ -275,6 +315,7 @@ pub fn encode_block(block: &[Change], arena: &SharedArena) -> Vec<u8> {
commit_msgs: Cow::Owned(vec![]),
cids: container_arena.encode().into(),
keys: keys_bytes.into(),
positions: position_bytes.into(),
ops: ops_bytes.into(),
values: value_bytes.into(),
};
@ -307,6 +348,7 @@ struct Registers {
peer_register: ValueRegister<PeerID>,
key_register: ValueRegister<loro_common::InternalString>,
cid_register: ValueRegister<ContainerID>,
position_register: ValueRegister<FractionalIndex>,
}
use crate::encoding::value::{
@ -335,8 +377,7 @@ impl ValueEncodeRegister for Registers {
is_parent_null: parent.is_none(),
parent_peer_idx: parent.map_or(0, |p| self.peer_register.register(&p.peer)),
parent_cnt: parent.map_or(0, |p| p.counter),
// PERF: maybe we can use Bytes for position
fractional_index: position.as_bytes().into(),
position_idx: self.position_register.register(position),
}),
tree_op::TreeOp::Move {
target,
@ -348,8 +389,7 @@ impl ValueEncodeRegister for Registers {
is_parent_null: parent.is_none(),
parent_peer_idx: parent.map_or(0, |p| self.peer_register.register(&p.peer)),
parent_cnt: parent.map_or(0, |p| p.counter),
// PERF: maybe we can use Bytes for position
fractional_index: position.as_bytes().into(),
position_idx: self.position_register.register(position),
}),
tree_op::TreeOp::Delete { target } => {
let parent = TreeID::delete_root();
@ -359,7 +399,7 @@ impl ValueEncodeRegister for Registers {
is_parent_null: false,
parent_peer_idx: self.peer_register.register(&parent.peer),
parent_cnt: parent.counter,
fractional_index: Vec::new(),
position_idx: 0,
})
}
}
@ -641,6 +681,7 @@ pub fn decode_block(
keys,
ops,
values,
positions,
..
} = doc;
let mut changes = Vec::with_capacity(n_changes as usize);
@ -654,6 +695,8 @@ pub fn decode_block(
peers: &header.peers,
keys,
};
let positions = PositionArena::decode(&positions)?;
let positions = positions.parse_to_positions();
let cids: &Vec<ContainerID> = header.cids.get_or_init(|| {
ContainerArena::decode(&cids)
.unwrap()
@ -701,7 +744,7 @@ pub fn decode_block(
&mut del_iter,
shared_arena,
&decode_arena,
&[],
&positions,
prop,
ID::new(peer, counter),
)?;