loro/crates/rle/src/rle_tree.rs

411 lines
14 KiB
Rust
Raw Normal View History

2022-10-11 08:50:22 +00:00
use std::{collections::HashMap, ptr::NonNull};
2022-09-06 14:22:54 +00:00
use self::node::{InternalNode, LeafNode, Node};
2022-09-01 17:14:39 +00:00
use crate::Rle;
2022-08-05 10:47:51 +00:00
pub(self) use bumpalo::collections::vec::Vec as BumpVec;
2022-08-10 10:42:36 +00:00
use bumpalo::Bump;
2022-09-09 15:31:49 +00:00
pub use cursor::{SafeCursor, SafeCursorMut, UnsafeCursor};
2022-10-12 17:24:47 +00:00
use fxhash::FxHashMap;
2022-10-03 09:35:44 +00:00
use num::FromPrimitive;
use ouroboros::self_referencing;
use smallvec::SmallVec;
2022-09-09 15:31:49 +00:00
pub use tree_trait::Position;
2022-08-05 10:47:51 +00:00
use tree_trait::RleTreeTrait;
2022-08-12 12:46:38 +00:00
2022-09-01 10:47:48 +00:00
mod cursor;
2022-09-06 14:22:54 +00:00
pub mod iter;
2022-08-12 12:46:38 +00:00
pub mod node;
2022-08-10 07:41:21 +00:00
#[cfg(test)]
mod test;
2022-08-12 12:46:38 +00:00
pub mod tree_trait;
2022-08-05 10:47:51 +00:00
#[self_referencing]
2022-09-01 16:59:02 +00:00
#[derive(Debug)]
2022-08-09 13:25:24 +00:00
pub struct RleTree<T: Rle + 'static, A: RleTreeTrait<T> + 'static> {
bump: Bump,
#[borrows(bump)]
2022-10-09 08:54:34 +00:00
node: &'this mut Node<'this, T, A>,
2022-08-05 12:04:49 +00:00
}
impl<T: Rle + 'static, A: RleTreeTrait<T> + 'static> Default for RleTree<T, A> {
fn default() -> Self {
RleTreeBuilder {
bump: Bump::new(),
2022-10-09 08:54:34 +00:00
node_builder: |bump| bump.alloc(Node::Internal(InternalNode::new(bump, None))),
}
.build()
}
}
2022-10-09 08:54:34 +00:00
impl<T: Rle, A: RleTreeTrait<T>> RleTree<T, A> {
pub fn insert_at_first<F>(&mut self, value: T, notify: &mut F)
where
F: FnMut(&T, *mut LeafNode<'_, T, A>),
{
if let Some(value) = self.with_node_mut(|node| {
let leaf = node.get_first_leaf();
if let Some(leaf) = leaf {
// SAFETY: we have exclusive ref to the tree
let cursor = unsafe { SafeCursorMut::new(leaf.into(), 0, 0, Position::Start, 0) };
cursor.insert_before_notify(value, notify);
None
} else {
Some(value)
}
}) {
self.insert_notify(A::Int::from_u8(0).unwrap(), value, notify);
}
}
2022-08-09 13:25:24 +00:00
#[inline]
pub fn insert(&mut self, index: A::Int, value: T) {
2022-10-09 08:54:34 +00:00
self.with_node_mut(|node| {
node.as_internal_mut()
.unwrap()
.insert(index, value, &mut |_a, _b| {})
.unwrap();
})
2022-09-06 14:22:54 +00:00
}
/// `notify` would be invoke if a new element is inserted/moved to a new leaf node.
#[inline]
pub fn insert_notify<F>(&mut self, index: A::Int, value: T, notify: &mut F)
where
F: FnMut(&T, *mut LeafNode<'_, T, A>),
{
2022-10-09 08:54:34 +00:00
self.with_node_mut(|node| {
node.as_internal_mut()
.unwrap()
.insert(index, value, notify)
.unwrap();
})
2022-08-05 10:47:51 +00:00
}
2022-09-06 16:14:35 +00:00
/// return a cursor at the given index
2022-09-02 10:41:59 +00:00
#[inline]
2022-10-09 12:23:37 +00:00
pub fn get(&self, mut index: A::Int) -> Option<SafeCursor<'_, T, A>> {
2022-10-09 08:54:34 +00:00
self.with_node(|mut node| {
loop {
match node {
Node::Internal(internal_node) => {
let result = A::find_pos_internal(internal_node, index);
if !result.found {
return None;
}
node = &internal_node.children[result.child_index];
index = result.offset;
2022-09-06 16:14:35 +00:00
}
2022-10-09 08:54:34 +00:00
Node::Leaf(leaf) => {
let result = A::find_pos_leaf(leaf, index);
if !result.found {
return None;
}
// SAFETY: result is valid
return Some(unsafe {
std::mem::transmute(SafeCursor::new(
leaf.into(),
result.child_index,
result.offset,
result.pos,
0,
))
});
2022-09-06 16:14:35 +00:00
}
}
}
2022-10-09 08:54:34 +00:00
})
2022-09-06 16:14:35 +00:00
}
/// return the first valid cursor after the given index
2022-10-10 09:36:55 +00:00
/// reviewed by @Leeeon233
2022-09-06 16:14:35 +00:00
#[inline]
2022-10-09 12:23:37 +00:00
fn get_cursor_ge(&self, mut index: A::Int) -> Option<SafeCursor<'_, T, A>> {
2022-10-09 08:54:34 +00:00
self.with_node(|mut node| {
loop {
match node {
Node::Internal(internal_node) => {
let result = A::find_pos_internal(internal_node, index);
if result.child_index >= internal_node.children.len() {
return None;
}
node = &internal_node.children[result.child_index];
index = result.offset;
2022-09-06 16:14:35 +00:00
}
2022-10-09 08:54:34 +00:00
Node::Leaf(leaf) => {
let result = A::find_pos_leaf(leaf, index);
if result.child_index >= leaf.children.len() {
return None;
}
// SAFETY: result is valid
return Some(unsafe {
std::mem::transmute(SafeCursor::new(
leaf.into(),
result.child_index,
result.offset,
result.pos,
0,
))
});
2022-09-06 16:14:35 +00:00
}
2022-09-01 10:47:48 +00:00
}
}
2022-10-09 08:54:34 +00:00
})
2022-09-01 10:47:48 +00:00
}
#[inline]
2022-10-09 12:23:37 +00:00
pub fn get_mut(&mut self, index: A::Int) -> Option<SafeCursorMut<'_, T, A>> {
2022-09-02 10:41:59 +00:00
let cursor = self.get(index);
2022-10-14 11:43:20 +00:00
// SAFETY: this is safe because we have exclusive ref to the tree
cursor.map(|x| unsafe { SafeCursorMut::from(x.0) })
2022-08-05 10:47:51 +00:00
}
2022-10-03 09:35:44 +00:00
#[inline]
2022-10-09 12:23:37 +00:00
pub fn iter(&self) -> iter::Iter<'_, T, A> {
2022-10-09 08:54:34 +00:00
// SAFETY: the cursor and iter cannot outlive self
self.with_node(|node| unsafe {
2022-10-09 12:23:37 +00:00
iter::Iter::new(std::mem::transmute(node.get_first_leaf()))
2022-10-09 08:54:34 +00:00
})
2022-08-05 10:47:51 +00:00
}
2022-10-03 09:35:44 +00:00
#[inline]
2022-10-09 12:23:37 +00:00
pub fn iter_mut(&mut self) -> iter::IterMut<'_, T, A> {
2022-10-09 08:54:34 +00:00
// SAFETY: the cursor and iter cannot outlive self
self.with_node_mut(|node| unsafe {
2022-10-09 12:23:37 +00:00
iter::IterMut::new(std::mem::transmute(node.get_first_leaf_mut()))
2022-10-09 08:54:34 +00:00
})
2022-10-03 09:35:44 +00:00
}
#[inline]
pub fn empty(&self) -> bool {
self.len() == A::Int::from_usize(0).unwrap()
}
2022-10-09 08:54:34 +00:00
pub fn iter_mut_in(
&mut self,
2022-10-09 12:23:37 +00:00
start: Option<SafeCursor<'_, T, A>>,
end: Option<SafeCursor<'_, T, A>>,
) -> iter::IterMut<'_, T, A> {
if start.is_none() && end.is_none() {
2022-10-03 09:35:44 +00:00
self.iter_mut()
} else {
2022-10-09 08:54:34 +00:00
// SAFETY: the cursor cannot outlive self, so we are safe here
self.with_node_mut(|node| unsafe {
let leaf = node.get_first_leaf().unwrap().into();
// SAFETY: this is safe because we know there are at least one element in the tree
let start = start.unwrap_or_else(|| {
std::mem::transmute(SafeCursor::new(leaf, 0, 0, Position::Start, 0))
});
2022-10-14 11:43:20 +00:00
let start: SafeCursorMut<'_, T, A> = SafeCursorMut::from(start.0);
2022-10-09 12:23:37 +00:00
std::mem::transmute::<_, iter::IterMut<'_, T, A>>(iter::IterMut::from_cursor(
std::mem::transmute::<_, SafeCursorMut<'_, T, A>>(start),
end,
2022-10-09 08:54:34 +00:00
))
})
2022-10-03 09:35:44 +00:00
}
}
2022-08-11 12:12:47 +00:00
pub fn delete_range(&mut self, start: Option<A::Int>, end: Option<A::Int>) {
2022-10-09 08:54:34 +00:00
self.with_node_mut(|node| {
node.as_internal_mut()
.unwrap()
.delete(start, end, &mut |_, _| {});
})
2022-08-05 10:47:51 +00:00
}
2022-09-06 14:22:54 +00:00
pub fn delete_range_notify<F>(
&mut self,
start: Option<A::Int>,
end: Option<A::Int>,
notify: &mut F,
) where
F: FnMut(&T, *mut LeafNode<'_, T, A>),
{
2022-10-09 08:54:34 +00:00
self.with_node_mut(|node| {
node.as_internal_mut().unwrap().delete(start, end, notify);
})
2022-09-06 14:22:54 +00:00
}
2022-10-10 09:36:55 +00:00
/// reviewed by @Leeeon233
2022-10-09 12:23:37 +00:00
pub fn iter_range(&self, start: A::Int, end: Option<A::Int>) -> iter::Iter<'_, T, A> {
2022-09-07 10:43:21 +00:00
let cursor_from = self.get_cursor_ge(start);
2022-09-06 16:14:35 +00:00
if cursor_from.is_none() {
return iter::Iter::new(None);
}
let cursor_from = cursor_from.unwrap();
2022-09-07 10:43:21 +00:00
if let Some(ans) = {
if let Some(end) = end {
let cursor_to = self.get_cursor_ge(end);
iter::Iter::from_cursor(cursor_from, cursor_to)
2022-09-06 16:14:35 +00:00
} else {
2022-09-07 10:43:21 +00:00
None
2022-09-06 14:22:54 +00:00
}
2022-09-07 10:43:21 +00:00
} {
ans
} else {
2022-10-12 17:24:47 +00:00
iter::Iter::from_cursor(cursor_from, None).unwrap_or_default()
2022-09-06 14:22:54 +00:00
}
2022-08-05 10:47:51 +00:00
}
2022-10-11 08:50:22 +00:00
pub fn update_at_cursors<U, F>(
&mut self,
cursors: &mut [UnsafeCursor<T, A>],
2022-10-11 08:50:22 +00:00
update_fn: &mut U,
notify: &mut F,
) where
U: FnMut(&mut T),
F: FnMut(&T, *mut LeafNode<T, A>),
{
2022-10-12 17:24:47 +00:00
let mut updates_map: HashMap<NonNull<_>, Vec<(usize, Vec<T>)>, _> = FxHashMap::default();
2022-10-11 08:50:22 +00:00
for cursor in cursors {
// SAFETY: we has the exclusive reference to the tree and the cursor is valid
let updates = unsafe {
cursor
.leaf
.as_ref()
.pure_update(cursor.index, cursor.offset, cursor.len, update_fn)
};
if let Some(update) = updates {
updates_map
.entry(cursor.leaf)
.or_default()
.push((cursor.index, update));
}
}
2022-10-15 15:45:03 +00:00
self.update_with_gathered_map(updates_map, notify);
}
// TODO: perf, use smallvec
pub fn update_at_cursors_with_args<U, F, Arg>(
2022-10-15 15:45:03 +00:00
&mut self,
cursor_groups: &[UnsafeCursor<T, A>],
args: &[Arg],
update_fn: &mut U,
2022-10-15 15:45:03 +00:00
notify: &mut F,
) where
U: FnMut(&mut T, &Arg),
2022-10-15 15:45:03 +00:00
F: FnMut(&T, *mut LeafNode<T, A>),
{
let mut cursor_map: HashMap<(NonNull<_>, usize), Vec<(&UnsafeCursor<T, A>, &Arg)>, _> =
FxHashMap::default();
for (i, arg) in args.iter().enumerate() {
let cursor = &cursor_groups[i];
cursor_map
.entry((cursor.leaf, cursor.index))
.or_default()
.push((cursor, arg));
}
2022-10-15 15:45:03 +00:00
let mut updates_map: HashMap<NonNull<_>, Vec<(usize, Vec<T>)>, _> = FxHashMap::default();
for ((mut leaf, index), args) in cursor_map.iter() {
// SAFETY: we has the exclusive reference to the tree and the cursor is valid
let leaf = unsafe { leaf.as_mut() };
let input_args = args.iter().map(|x| x.1).collect::<Vec<_>>();
let updates = leaf.pure_updates_at_same_index(
*index,
&args.iter().map(|x| x.0.offset).collect::<Vec<_>>(),
&args.iter().map(|x| x.0.len).collect::<Vec<_>>(),
&input_args,
update_fn,
);
2022-10-15 15:45:03 +00:00
if let Some(update) = updates {
updates_map
.entry(leaf.into())
.or_default()
.push((*index, update.into_iter().collect()));
2022-10-15 15:45:03 +00:00
}
}
self.update_with_gathered_map(updates_map, notify);
}
// TODO: perf, use smallvec
2022-10-15 15:45:03 +00:00
fn update_with_gathered_map<F, M>(
&mut self,
iter: HashMap<NonNull<LeafNode<T, A>>, Vec<(usize, Vec<T>)>, M>,
notify: &mut F,
) where
F: FnMut(&T, *mut LeafNode<T, A>),
{
2022-10-12 17:24:47 +00:00
let mut internal_updates_map: HashMap<NonNull<_>, Vec<(usize, Vec<_>)>, _> =
FxHashMap::default();
2022-10-15 15:45:03 +00:00
for (mut leaf, updates) in iter {
2022-10-11 08:50:22 +00:00
// SAFETY: we has the exclusive reference to the tree and the cursor is valid
let leaf = unsafe { leaf.as_mut() };
if let Err(new) = leaf.apply_updates(updates, notify) {
internal_updates_map
.entry(leaf.parent)
.or_default()
.push((leaf.get_index_in_parent().unwrap(), new));
} else {
// insert empty value to trigger cache update
2022-10-12 06:55:59 +00:00
internal_updates_map.entry(leaf.parent).or_default();
2022-10-11 08:50:22 +00:00
}
}
while !internal_updates_map.is_empty() {
let updates_map = std::mem::take(&mut internal_updates_map);
for (mut node, updates) in updates_map {
// SAFETY: we has the exclusive reference to the tree and the cursor is valid
let node = unsafe { node.as_mut() };
if let Err(new) = node.apply_updates(updates) {
internal_updates_map
.entry(node.parent.unwrap())
.or_default()
.push((node.get_index_in_parent().unwrap(), new));
} else if node.parent.is_some() {
2022-10-11 08:50:22 +00:00
// insert empty value to trigger cache update
2022-10-12 06:55:59 +00:00
internal_updates_map
.entry(node.parent.unwrap())
.or_default();
} else {
A::update_cache_internal(node);
2022-10-11 08:50:22 +00:00
}
}
}
}
pub fn update_range<U, F>(
2022-10-11 08:50:22 +00:00
&mut self,
start: A::Int,
end: Option<A::Int>,
update_fn: &mut U,
notify: &mut F,
) where
U: FnMut(&mut T),
F: FnMut(&T, *mut LeafNode<'_, T, A>),
{
let mut cursors = Vec::new();
for cursor in self.iter_range(start, end) {
cursors.push(cursor.0);
}
// SAFETY: it's perfectly safe here because we know what we are doing in the update_at_cursors
let mut cursors: Vec<_> = unsafe { std::mem::transmute(cursors) };
self.update_at_cursors(&mut cursors, update_fn, notify);
2022-10-11 08:50:22 +00:00
}
2022-08-12 12:46:38 +00:00
pub fn debug_check(&mut self) {
2022-10-09 08:54:34 +00:00
self.with_node_mut(|node| {
node.as_internal_mut().unwrap().check();
})
2022-08-05 10:47:51 +00:00
}
2022-10-10 13:47:57 +00:00
// pub fn iter_cursor_mut(&mut self) -> impl Iterator<Item = SafeCursorMut<'_, T, A>> {}
2022-08-05 10:47:51 +00:00
}
2022-08-05 12:04:49 +00:00
2022-10-09 08:54:34 +00:00
impl<T: Rle, A: RleTreeTrait<T>> RleTree<T, A> {
2022-09-01 13:32:32 +00:00
#[inline]
pub fn len(&self) -> A::Int {
2022-10-09 08:54:34 +00:00
self.with_node(|node| node.len())
2022-08-05 12:04:49 +00:00
}
}