feat: rle

This commit is contained in:
Zixuan Chen 2022-07-13 00:47:41 +08:00
commit 2c7e2de763
12 changed files with 295 additions and 0 deletions

2
.editorconfig Normal file
View file

@ -0,0 +1,2 @@
[*.rs]
indent_size = 4

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/target
/Cargo.lock

2
Cargo.toml Normal file
View file

@ -0,0 +1,2 @@
[workspace]
members = ["crates/*"]

0
README.md Normal file
View file

View file

@ -0,0 +1,9 @@
[package]
name = "loro-core"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
string_cache = "0.8.3"

View file

@ -0,0 +1,7 @@
pub type ClientID = u64;
#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, PartialOrd, Ord)]
pub struct ID {
client_id: u64,
counter: u32,
}

View file

@ -0,0 +1,12 @@
#![allow(dead_code, unused_imports)]
mod id;
mod store;
#[cfg(test)]
mod tests {
#[test]
fn it_works() {
let result = 2 + 2;
assert_eq!(result, 4);
}
}

View file

@ -0,0 +1,3 @@
struct RleArray<T> {
data: Vec<T>,
}

View file

@ -0,0 +1,11 @@
use std::collections::HashMap;
use string_cache::{Atom, DefaultAtom, EmptyStaticAtomSet};
use crate::id::ClientID;
#[non_exhaustive]
struct Change {}
struct Store {
map: HashMap<ClientID, Vec<Change>>,
}

8
crates/rle/Cargo.toml Normal file
View file

@ -0,0 +1,8 @@
[package]
name = "rle"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

2
crates/rle/src/lib.rs Normal file
View file

@ -0,0 +1,2 @@
mod rle;
pub use rle::{HasLength, Mergable, RleVec, SearchResult, Sliceable};

237
crates/rle/src/rle.rs Normal file
View file

@ -0,0 +1,237 @@
/// RleVec<T> is a vector that can be compressed using run-length encoding.
///
/// A T value may be merged with its neighbors. When we push new element, the new value
/// may be merged with the last element in the array. Each value has a length, so there
/// are two types of indexes:
/// 1. (merged) It refers to the index of the merged element.
/// 2. (atom) The index of substantial elements. It refers to the index of the atom element.
///
/// By default, we use atom index in RleVec.
/// - len() returns the number of atom elements in the array.
/// - get(index) returns the atom element at the index.
/// - slice(from, to) returns a slice of atom elements from the index from to the index to.
pub struct RleVec<T> {
vec: Vec<T>,
_len: usize,
index: Vec<usize>,
}
pub trait Mergable {
fn is_mergable(&self, other: &Self) -> bool;
fn merge(&mut self, other: &Self);
}
pub trait Sliceable {
fn slice(&self, start: usize, end: usize) -> Self;
}
pub trait HasLength {
fn is_empty(&self) -> bool {
self.len() == 0
}
fn len(&self) -> usize;
}
pub struct SearchResult<'a, T> {
element: &'a T,
merged_index: usize,
offset: usize,
}
impl<T: Mergable + Sliceable + HasLength> RleVec<T> {
/// push a new element to the end of the array. It may be merged with last element.
pub fn push(&mut self, value: T) {
self._len += value.len();
if self.vec.is_empty() {
self.vec.push(value);
self.index.push(0);
self.index.push(self._len);
return;
}
let last = self.vec.last_mut().unwrap();
if last.is_mergable(&value) {
last.merge(&value);
*self.index.last_mut().unwrap() = self._len;
return;
}
self.vec.push(value);
self.index.push(self._len);
}
pub fn is_empty(&self) -> bool {
self.vec.is_empty()
}
pub fn len(&self) -> usize {
self._len
}
/// get the element at the given atom index.
/// return: (element, merged_index, offset)
pub fn get(&self, index: usize) -> SearchResult<'_, T> {
let mut start = 0;
let mut end = self.index.len() - 1;
while start < end {
let mid = (start + end) / 2;
if self.index[mid] == index {
start = mid;
break;
}
if self.index[mid] < index {
start = mid + 1;
} else {
end = mid;
}
}
if index < self.index[start] {
start -= 1;
}
let value = &self.vec[start];
SearchResult {
element: value,
merged_index: start,
offset: index - self.index[start],
}
}
/// get a slice from `from` to `to` with atom indexes
pub fn slice(&self, from: usize, to: usize) -> SliceIterator<'_, T> {
let from_result = self.get(from);
let to_result = self.get(to);
SliceIterator {
vec: &self.vec,
cur_index: from_result.merged_index,
cur_offset: from_result.offset,
end_index: to_result.merged_index,
end_offset: to_result.offset,
}
}
}
impl<T> RleVec<T> {
pub fn new() -> Self {
RleVec {
vec: Vec::new(),
_len: 0,
index: Vec::new(),
}
}
}
impl<T> Default for RleVec<T> {
fn default() -> Self {
Self::new()
}
}
pub struct SliceIterator<'a, T> {
vec: &'a Vec<T>,
cur_index: usize,
cur_offset: usize,
end_index: usize,
end_offset: usize,
}
#[derive(Debug, Clone, Copy)]
pub struct Slice<'a, T> {
value: &'a T,
start: usize,
end: usize,
}
impl<'a, T: HasLength> Iterator for SliceIterator<'a, T> {
type Item = Slice<'a, T>;
fn next(&mut self) -> Option<Self::Item> {
if self.cur_index == self.end_index {
if self.cur_offset == self.end_offset {
return None;
}
let ans = Slice {
value: &self.vec[self.cur_index],
start: self.cur_offset,
end: self.end_offset,
};
self.cur_offset = self.end_offset;
return Some(ans);
}
let ans = Slice {
value: &self.vec[self.cur_index],
start: self.cur_offset,
end: self.vec[self.cur_index].len(),
};
self.cur_index += 1;
self.cur_offset = 0;
Some(ans)
}
}
#[cfg(test)]
mod test {
mod prime_value {
use crate::{HasLength, Mergable, RleVec, Sliceable};
impl HasLength for String {
fn len(&self) -> usize {
self.len()
}
}
impl Mergable for String {
fn is_mergable(&self, _: &Self) -> bool {
self.len() < 8
}
fn merge(&mut self, other: &Self) {
self.push_str(other);
}
}
impl Sliceable for String {
fn slice(&self, start: usize, end: usize) -> Self {
self[start..end].to_string()
}
}
#[test]
fn get_at_atom_index() {
let mut vec: RleVec<String> = RleVec::new();
vec.push("1234".to_string());
vec.push("5678".to_string());
vec.push("12345678".to_string());
assert_eq!(vec.get(4).element, "12345678");
assert_eq!(vec.get(4).merged_index, 0);
assert_eq!(vec.get(4).offset, 4);
assert_eq!(vec.get(8).element, "12345678");
assert_eq!(vec.get(8).merged_index, 1);
assert_eq!(vec.get(8).offset, 0);
}
#[test]
fn slice() {
let mut vec: RleVec<String> = RleVec::new();
vec.push("1234".to_string());
vec.push("56".to_string());
vec.push("78".to_string());
vec.push("12345678".to_string());
let mut iter = vec.slice(4, 12);
let first = iter.next().unwrap();
assert_eq!(first.value, "12345678");
assert_eq!(first.start, 4);
assert_eq!(first.end, 8);
let second = iter.next().unwrap();
assert_eq!(second.value, "12345678");
assert_eq!(second.start, 0);
assert_eq!(second.end, 4);
}
}
}