forked from mirrors/jj
index: build reachable change ids set lazily
Instead of abstracting RevWalk over borrowed/Arc-ed index types, I decided to implement bitset-based ancestor traversal. It's simpler and probably faster so long as the set isn't sparse. "jj log" without working copy snapshot: ``` % hyperfine --sort command --warmup 3 --runs 20 -L bin jj-0,jj-1,jj-2 \ -s "target/release-with-debug/{bin} -R ~/mirrors/linux debug reindex" \ "target/release-with-debug/{bin} -R ~/mirrors/linux \ --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=\"\"'" Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 271.3 ms ± 9.9 ms [User: 183.8 ms, System: 87.7 ms] Range (min … max): 250.5 ms … 282.7 ms 20 runs Benchmark 3: target/release-with-debug/jj-2 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 177.5 ms ± 12.6 ms [User: 94.6 ms, System: 82.9 ms] Range (min … max): 154.4 ms … 188.7 ms 20 runs Relative speed comparison 1.53 ± 0.12 target/release-with-debug/jj-1 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' 1.00 target/release-with-debug/jj-2 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' ``` "jj status" with working copy snapshot (watchman enabled): ``` % hyperfine --sort command --warmup 3 --runs 20 -L bin jj-0,jj-1,jj-2 \ -s "target/release-with-debug/{bin} -R ~/mirrors/linux debug reindex" \ "target/release-with-debug/{bin} -R ~/mirrors/linux \ status --config-toml='revsets.short-prefixes=\"\"'" Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 318.6 ms ± 12.6 ms [User: 219.1 ms, System: 94.1 ms] Range (min … max): 294.2 ms … 333.0 ms 20 runs Benchmark 3: target/release-with-debug/jj-2 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 214.7 ms ± 15.0 ms [User: 117.4 ms, System: 96.1 ms] Range (min … max): 198.4 ms … 243.3 ms 20 runs Relative speed comparison 1.48 ± 0.12 target/release-with-debug/jj-1 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""' 1.00 target/release-with-debug/jj-2 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""' ```
This commit is contained in:
parent
adcb01ef95
commit
a1b16c5583
2 changed files with 220 additions and 22 deletions
|
@ -17,7 +17,7 @@
|
|||
use std::cmp::{max, min, Ordering};
|
||||
use std::collections::{BTreeSet, BinaryHeap, HashSet};
|
||||
use std::iter;
|
||||
use std::sync::Arc;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use itertools::Itertools;
|
||||
|
||||
|
@ -26,7 +26,7 @@ use super::entry::{
|
|||
SmallLocalPositionsVec,
|
||||
};
|
||||
use super::readonly::ReadonlyIndexSegment;
|
||||
use super::rev_walk::RevWalk;
|
||||
use super::rev_walk::{AncestorsBitSet, RevWalk};
|
||||
use super::revset_engine;
|
||||
use crate::backend::{ChangeId, CommitId};
|
||||
use crate::hex_util;
|
||||
|
@ -496,28 +496,19 @@ impl Index for CompositeIndex<'_> {
|
|||
|
||||
pub(super) struct ChangeIdIndexImpl<I> {
|
||||
index: I,
|
||||
reachable_bitset: Vec<u64>,
|
||||
reachable_set: Mutex<AncestorsBitSet>,
|
||||
}
|
||||
|
||||
impl<I: AsCompositeIndex> ChangeIdIndexImpl<I> {
|
||||
pub fn new(index: I, heads: &mut dyn Iterator<Item = &CommitId>) -> ChangeIdIndexImpl<I> {
|
||||
// TODO: Calculate reachable bitset lazily.
|
||||
let composite = index.as_composite();
|
||||
let bitset_len =
|
||||
usize::try_from(u32::div_ceil(composite.num_commits(), u64::BITS)).unwrap();
|
||||
let mut reachable_bitset = vec![0; bitset_len]; // request zeroed page
|
||||
let head_positions = heads
|
||||
.map(|id| composite.commit_id_to_pos(id).unwrap())
|
||||
.collect_vec();
|
||||
for entry in composite.walk_revs(&head_positions, &[]) {
|
||||
let IndexPosition(pos) = entry.position();
|
||||
let bitset_pos = pos / u64::BITS;
|
||||
let bit = 1_u64 << (pos % u64::BITS);
|
||||
reachable_bitset[usize::try_from(bitset_pos).unwrap()] |= bit;
|
||||
let mut reachable_set = AncestorsBitSet::with_capacity(composite.num_commits());
|
||||
for id in heads {
|
||||
reachable_set.add_head(composite.commit_id_to_pos(id).unwrap());
|
||||
}
|
||||
ChangeIdIndexImpl {
|
||||
index,
|
||||
reachable_bitset,
|
||||
reachable_set: Mutex::new(reachable_set),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -534,14 +525,12 @@ impl<I: AsCompositeIndex + Send + Sync> ChangeIdIndex for ChangeIdIndexImpl<I> {
|
|||
match index.resolve_change_id_prefix(prefix) {
|
||||
PrefixResolution::NoMatch => PrefixResolution::NoMatch,
|
||||
PrefixResolution::SingleMatch((_change_id, positions)) => {
|
||||
debug_assert!(positions.iter().tuple_windows().all(|(a, b)| a < b));
|
||||
let mut reachable_set = self.reachable_set.lock().unwrap();
|
||||
reachable_set.visit_until(index, *positions.first().unwrap());
|
||||
let reachable_commit_ids = positions
|
||||
.iter()
|
||||
.filter(|IndexPosition(pos)| {
|
||||
let bitset_pos = pos / u64::BITS;
|
||||
let bit = 1_u64 << (pos % u64::BITS);
|
||||
let bits = self.reachable_bitset[usize::try_from(bitset_pos).unwrap()];
|
||||
bits & bit != 0
|
||||
})
|
||||
.filter(|&&pos| reachable_set.contains(pos))
|
||||
.map(|&pos| index.entry_by_pos(pos).commit_id())
|
||||
.collect_vec();
|
||||
if reachable_commit_ids.is_empty() {
|
||||
|
|
|
@ -503,6 +503,78 @@ impl<'a> Iterator for RevWalkDescendants<'a> {
|
|||
|
||||
impl FusedIterator for RevWalkDescendants<'_> {}
|
||||
|
||||
/// Computes ancestors set lazily.
|
||||
///
|
||||
/// This is similar to `RevWalk` functionality-wise, but implemented with the
|
||||
/// different design goals:
|
||||
///
|
||||
/// * lazy updates with no lifetimed fields
|
||||
/// * optimized for dense ancestors set
|
||||
/// * optimized for testing set membership
|
||||
/// * no iterator API (which could be implemented on top)
|
||||
#[derive(Clone, Debug)]
|
||||
pub(super) struct AncestorsBitSet {
|
||||
bitset: Vec<u64>,
|
||||
last_visited_bitset_pos: u32,
|
||||
}
|
||||
|
||||
impl AncestorsBitSet {
|
||||
/// Creates bit set of the specified capacity.
|
||||
pub fn with_capacity(len: u32) -> Self {
|
||||
let bitset_len = usize::try_from(u32::div_ceil(len, u64::BITS)).unwrap();
|
||||
AncestorsBitSet {
|
||||
bitset: vec![0; bitset_len], // request zeroed page
|
||||
last_visited_bitset_pos: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds head `pos` to the set.
|
||||
///
|
||||
/// Panics if the `pos` exceeds the capacity.
|
||||
pub fn add_head(&mut self, pos: IndexPosition) {
|
||||
let bitset_pos = pos.0 / u64::BITS;
|
||||
let bit = 1_u64 << (pos.0 % u64::BITS);
|
||||
self.bitset[usize::try_from(bitset_pos).unwrap()] |= bit;
|
||||
self.last_visited_bitset_pos = max(self.last_visited_bitset_pos, bitset_pos + 1);
|
||||
}
|
||||
|
||||
/// Returns `true` if the given `pos` is ancestors of the heads.
|
||||
///
|
||||
/// Panics if the `pos` exceeds the capacity or has not been visited yet.
|
||||
pub fn contains(&self, pos: IndexPosition) -> bool {
|
||||
let bitset_pos = pos.0 / u64::BITS;
|
||||
let bit = 1_u64 << (pos.0 % u64::BITS);
|
||||
assert!(bitset_pos >= self.last_visited_bitset_pos);
|
||||
self.bitset[usize::try_from(bitset_pos).unwrap()] & bit != 0
|
||||
}
|
||||
|
||||
/// Updates set by visiting ancestors until the given `to_visit_pos`.
|
||||
pub fn visit_until(&mut self, index: CompositeIndex, to_visit_pos: IndexPosition) {
|
||||
let to_visit_bitset_pos = to_visit_pos.0 / u64::BITS;
|
||||
if to_visit_bitset_pos >= self.last_visited_bitset_pos {
|
||||
return;
|
||||
}
|
||||
for visiting_bitset_pos in (to_visit_bitset_pos..self.last_visited_bitset_pos).rev() {
|
||||
let mut unvisited_bits = self.bitset[usize::try_from(visiting_bitset_pos).unwrap()];
|
||||
while unvisited_bits != 0 {
|
||||
let bit_pos = u64::BITS - unvisited_bits.leading_zeros() - 1; // from MSB
|
||||
unvisited_bits ^= 1_u64 << bit_pos;
|
||||
let current_pos = IndexPosition(visiting_bitset_pos * u64::BITS + bit_pos);
|
||||
for parent_pos in index.entry_by_pos(current_pos).parent_positions() {
|
||||
assert!(parent_pos < current_pos);
|
||||
let parent_bitset_pos = parent_pos.0 / u64::BITS;
|
||||
let bit = 1_u64 << (parent_pos.0 % u64::BITS);
|
||||
self.bitset[usize::try_from(parent_bitset_pos).unwrap()] |= bit;
|
||||
if visiting_bitset_pos == parent_bitset_pos {
|
||||
unvisited_bits |= bit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.last_visited_bitset_pos = to_visit_bitset_pos;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use itertools::Itertools as _;
|
||||
|
@ -512,6 +584,12 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::backend::{ChangeId, CommitId};
|
||||
|
||||
/// Generator of unique 16-byte CommitId excluding root id
|
||||
fn commit_id_generator() -> impl FnMut() -> CommitId {
|
||||
let mut iter = (1_u128..).map(|n| CommitId::new(n.to_le_bytes().into()));
|
||||
move || iter.next().unwrap()
|
||||
}
|
||||
|
||||
/// Generator of unique 16-byte ChangeId excluding root id
|
||||
fn change_id_generator() -> impl FnMut() -> ChangeId {
|
||||
let mut iter = (1_u128..).map(|n| ChangeId::new(n.to_le_bytes().into()));
|
||||
|
@ -864,4 +942,135 @@ mod tests {
|
|||
[&id_2, &id_4, &id_7].map(Clone::clone)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ancestors_bit_set() {
|
||||
let mut new_commit_id = commit_id_generator();
|
||||
let mut new_change_id = change_id_generator();
|
||||
let mut mutable_index = DefaultMutableIndex::full(16, 16);
|
||||
|
||||
// F F = 256
|
||||
// |\ E = 193,194,195,..,254
|
||||
// E | D D = 192,255
|
||||
// | |/ C = 66,68,70,..,190
|
||||
// B C B = 65,67,69,..,189,191
|
||||
// |/ A = 0,1,2,..,64
|
||||
// A
|
||||
let id_a0 = new_commit_id();
|
||||
mutable_index.add_commit_data(id_a0.clone(), new_change_id(), &[]);
|
||||
let id_a64 = (1..=64).fold(id_a0.clone(), |parent_id, i| {
|
||||
assert_eq!(mutable_index.as_composite().num_commits(), i);
|
||||
let id = new_commit_id();
|
||||
mutable_index.add_commit_data(id.clone(), new_change_id(), &[parent_id]);
|
||||
id
|
||||
});
|
||||
let (id_b189, id_c190) = (65..=190).step_by(2).fold(
|
||||
(id_a64.clone(), id_a64.clone()),
|
||||
|(parent_id_b, parent_id_c), i| {
|
||||
assert_eq!(mutable_index.as_composite().num_commits(), i);
|
||||
let id_b = new_commit_id();
|
||||
let id_c = new_commit_id();
|
||||
mutable_index.add_commit_data(id_b.clone(), new_change_id(), &[parent_id_b]);
|
||||
mutable_index.add_commit_data(id_c.clone(), new_change_id(), &[parent_id_c]);
|
||||
(id_b, id_c)
|
||||
},
|
||||
);
|
||||
let id_b191 = new_commit_id();
|
||||
mutable_index.add_commit_data(id_b191.clone(), new_change_id(), &[id_b189]);
|
||||
let id_d192 = new_commit_id();
|
||||
mutable_index.add_commit_data(id_d192.clone(), new_change_id(), &[id_c190.clone()]);
|
||||
let id_e254 = (193..=254).fold(id_b191.clone(), |parent_id, i| {
|
||||
assert_eq!(mutable_index.as_composite().num_commits(), i);
|
||||
let id = new_commit_id();
|
||||
mutable_index.add_commit_data(id.clone(), new_change_id(), &[parent_id]);
|
||||
id
|
||||
});
|
||||
let id_d255 = new_commit_id();
|
||||
mutable_index.add_commit_data(id_d255.clone(), new_change_id(), &[id_d192.clone()]);
|
||||
let id_f256 = new_commit_id();
|
||||
mutable_index.add_commit_data(
|
||||
id_f256.clone(),
|
||||
new_change_id(),
|
||||
&[id_c190.clone(), id_e254.clone()],
|
||||
);
|
||||
assert_eq!(mutable_index.as_composite().num_commits(), 257);
|
||||
|
||||
let index = mutable_index.as_composite();
|
||||
let to_pos = |id: &CommitId| index.commit_id_to_pos(id).unwrap();
|
||||
let new_ancestors_set = |heads: &[&CommitId]| {
|
||||
let mut set = AncestorsBitSet::with_capacity(index.num_commits());
|
||||
for &id in heads {
|
||||
set.add_head(to_pos(id));
|
||||
}
|
||||
set
|
||||
};
|
||||
|
||||
// Nothing reachable
|
||||
let set = new_ancestors_set(&[]);
|
||||
assert_eq!(set.last_visited_bitset_pos, 0);
|
||||
for pos in (0..=256).map(IndexPosition) {
|
||||
assert!(!set.contains(pos), "{pos:?} should be unreachable");
|
||||
}
|
||||
|
||||
// All reachable
|
||||
let mut set = new_ancestors_set(&[&id_f256, &id_d255]);
|
||||
assert_eq!(set.last_visited_bitset_pos, 5);
|
||||
set.visit_until(index, to_pos(&id_f256));
|
||||
assert_eq!(set.last_visited_bitset_pos, 4);
|
||||
assert!(set.contains(to_pos(&id_f256)));
|
||||
set.visit_until(index, to_pos(&id_d192));
|
||||
assert_eq!(set.last_visited_bitset_pos, 3);
|
||||
assert!(set.contains(to_pos(&id_e254)));
|
||||
assert!(set.contains(to_pos(&id_d255)));
|
||||
assert!(set.contains(to_pos(&id_d192)));
|
||||
set.visit_until(index, to_pos(&id_a0));
|
||||
assert_eq!(set.last_visited_bitset_pos, 0);
|
||||
set.visit_until(index, to_pos(&id_f256)); // should be noop
|
||||
assert_eq!(set.last_visited_bitset_pos, 0);
|
||||
for pos in (0..=256).map(IndexPosition) {
|
||||
assert!(set.contains(pos), "{pos:?} should be reachable");
|
||||
}
|
||||
|
||||
// A, B, C, E, F are reachable
|
||||
let mut set = new_ancestors_set(&[&id_f256]);
|
||||
assert_eq!(set.last_visited_bitset_pos, 5);
|
||||
set.visit_until(index, to_pos(&id_f256));
|
||||
assert_eq!(set.last_visited_bitset_pos, 4);
|
||||
assert!(set.contains(to_pos(&id_f256)));
|
||||
set.visit_until(index, to_pos(&id_d192));
|
||||
assert_eq!(set.last_visited_bitset_pos, 3);
|
||||
assert!(!set.contains(to_pos(&id_d255)));
|
||||
assert!(!set.contains(to_pos(&id_d192)));
|
||||
set.visit_until(index, to_pos(&id_c190));
|
||||
assert_eq!(set.last_visited_bitset_pos, 2);
|
||||
assert!(set.contains(to_pos(&id_c190)));
|
||||
set.visit_until(index, to_pos(&id_a64));
|
||||
assert_eq!(set.last_visited_bitset_pos, 1);
|
||||
assert!(set.contains(to_pos(&id_b191)));
|
||||
assert!(set.contains(to_pos(&id_a64)));
|
||||
set.visit_until(index, to_pos(&id_a0));
|
||||
assert_eq!(set.last_visited_bitset_pos, 0);
|
||||
assert!(set.contains(to_pos(&id_a0)));
|
||||
|
||||
// A, C, D are reachable
|
||||
let mut set = new_ancestors_set(&[&id_d255]);
|
||||
assert_eq!(set.last_visited_bitset_pos, 4);
|
||||
assert!(!set.contains(to_pos(&id_f256)));
|
||||
set.visit_until(index, to_pos(&id_e254));
|
||||
assert_eq!(set.last_visited_bitset_pos, 3);
|
||||
assert!(!set.contains(to_pos(&id_e254)));
|
||||
set.visit_until(index, to_pos(&id_d255));
|
||||
assert_eq!(set.last_visited_bitset_pos, 3);
|
||||
assert!(set.contains(to_pos(&id_d255)));
|
||||
set.visit_until(index, to_pos(&id_b191));
|
||||
assert_eq!(set.last_visited_bitset_pos, 2);
|
||||
assert!(!set.contains(to_pos(&id_b191)));
|
||||
set.visit_until(index, to_pos(&id_c190));
|
||||
assert_eq!(set.last_visited_bitset_pos, 2);
|
||||
assert!(set.contains(to_pos(&id_c190)));
|
||||
set.visit_until(index, to_pos(&id_a0));
|
||||
assert_eq!(set.last_visited_bitset_pos, 0);
|
||||
assert!(set.contains(to_pos(&id_a64)));
|
||||
assert!(set.contains(to_pos(&id_a0)));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue