From a1b16c5583281d476a01a4d3d7d47f15e7058bdc Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Wed, 14 Feb 2024 15:24:42 +0900 Subject: [PATCH] index: build reachable change ids set lazily MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of abstracting RevWalk over borrowed/Arc-ed index types, I decided to implement bitset-based ancestor traversal. It's simpler and probably faster so long as the set isn't sparse. "jj log" without working copy snapshot: ``` % hyperfine --sort command --warmup 3 --runs 20 -L bin jj-0,jj-1,jj-2 \ -s "target/release-with-debug/{bin} -R ~/mirrors/linux debug reindex" \ "target/release-with-debug/{bin} -R ~/mirrors/linux \ --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=\"\"'" Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 271.3 ms ± 9.9 ms [User: 183.8 ms, System: 87.7 ms] Range (min … max): 250.5 ms … 282.7 ms 20 runs Benchmark 3: target/release-with-debug/jj-2 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 177.5 ms ± 12.6 ms [User: 94.6 ms, System: 82.9 ms] Range (min … max): 154.4 ms … 188.7 ms 20 runs Relative speed comparison 1.53 ± 0.12 target/release-with-debug/jj-1 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' 1.00 target/release-with-debug/jj-2 -R ~/mirrors/linux --ignore-working-copy log -r.. -l100 --config-toml='revsets.short-prefixes=""' ``` "jj status" with working copy snapshot (watchman enabled): ``` % hyperfine --sort command --warmup 3 --runs 20 -L bin jj-0,jj-1,jj-2 \ -s "target/release-with-debug/{bin} -R ~/mirrors/linux debug reindex" \ "target/release-with-debug/{bin} -R ~/mirrors/linux \ status --config-toml='revsets.short-prefixes=\"\"'" Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 318.6 ms ± 12.6 ms [User: 219.1 ms, System: 94.1 ms] Range (min … max): 294.2 ms … 333.0 ms 20 runs Benchmark 3: target/release-with-debug/jj-2 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""' Time (mean ± σ): 214.7 ms ± 15.0 ms [User: 117.4 ms, System: 96.1 ms] Range (min … max): 198.4 ms … 243.3 ms 20 runs Relative speed comparison 1.48 ± 0.12 target/release-with-debug/jj-1 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""' 1.00 target/release-with-debug/jj-2 -R ~/mirrors/linux status --config-toml='revsets.short-prefixes=""' ``` --- lib/src/default_index/composite.rs | 33 ++--- lib/src/default_index/rev_walk.rs | 209 +++++++++++++++++++++++++++++ 2 files changed, 220 insertions(+), 22 deletions(-) diff --git a/lib/src/default_index/composite.rs b/lib/src/default_index/composite.rs index 4cc773ee3..eda60436e 100644 --- a/lib/src/default_index/composite.rs +++ b/lib/src/default_index/composite.rs @@ -17,7 +17,7 @@ use std::cmp::{max, min, Ordering}; use std::collections::{BTreeSet, BinaryHeap, HashSet}; use std::iter; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use itertools::Itertools; @@ -26,7 +26,7 @@ use super::entry::{ SmallLocalPositionsVec, }; use super::readonly::ReadonlyIndexSegment; -use super::rev_walk::RevWalk; +use super::rev_walk::{AncestorsBitSet, RevWalk}; use super::revset_engine; use crate::backend::{ChangeId, CommitId}; use crate::hex_util; @@ -496,28 +496,19 @@ impl Index for CompositeIndex<'_> { pub(super) struct ChangeIdIndexImpl { index: I, - reachable_bitset: Vec, + reachable_set: Mutex, } impl ChangeIdIndexImpl { pub fn new(index: I, heads: &mut dyn Iterator) -> ChangeIdIndexImpl { - // TODO: Calculate reachable bitset lazily. let composite = index.as_composite(); - let bitset_len = - usize::try_from(u32::div_ceil(composite.num_commits(), u64::BITS)).unwrap(); - let mut reachable_bitset = vec![0; bitset_len]; // request zeroed page - let head_positions = heads - .map(|id| composite.commit_id_to_pos(id).unwrap()) - .collect_vec(); - for entry in composite.walk_revs(&head_positions, &[]) { - let IndexPosition(pos) = entry.position(); - let bitset_pos = pos / u64::BITS; - let bit = 1_u64 << (pos % u64::BITS); - reachable_bitset[usize::try_from(bitset_pos).unwrap()] |= bit; + let mut reachable_set = AncestorsBitSet::with_capacity(composite.num_commits()); + for id in heads { + reachable_set.add_head(composite.commit_id_to_pos(id).unwrap()); } ChangeIdIndexImpl { index, - reachable_bitset, + reachable_set: Mutex::new(reachable_set), } } } @@ -534,14 +525,12 @@ impl ChangeIdIndex for ChangeIdIndexImpl { match index.resolve_change_id_prefix(prefix) { PrefixResolution::NoMatch => PrefixResolution::NoMatch, PrefixResolution::SingleMatch((_change_id, positions)) => { + debug_assert!(positions.iter().tuple_windows().all(|(a, b)| a < b)); + let mut reachable_set = self.reachable_set.lock().unwrap(); + reachable_set.visit_until(index, *positions.first().unwrap()); let reachable_commit_ids = positions .iter() - .filter(|IndexPosition(pos)| { - let bitset_pos = pos / u64::BITS; - let bit = 1_u64 << (pos % u64::BITS); - let bits = self.reachable_bitset[usize::try_from(bitset_pos).unwrap()]; - bits & bit != 0 - }) + .filter(|&&pos| reachable_set.contains(pos)) .map(|&pos| index.entry_by_pos(pos).commit_id()) .collect_vec(); if reachable_commit_ids.is_empty() { diff --git a/lib/src/default_index/rev_walk.rs b/lib/src/default_index/rev_walk.rs index bb8e9f855..79280af2c 100644 --- a/lib/src/default_index/rev_walk.rs +++ b/lib/src/default_index/rev_walk.rs @@ -503,6 +503,78 @@ impl<'a> Iterator for RevWalkDescendants<'a> { impl FusedIterator for RevWalkDescendants<'_> {} +/// Computes ancestors set lazily. +/// +/// This is similar to `RevWalk` functionality-wise, but implemented with the +/// different design goals: +/// +/// * lazy updates with no lifetimed fields +/// * optimized for dense ancestors set +/// * optimized for testing set membership +/// * no iterator API (which could be implemented on top) +#[derive(Clone, Debug)] +pub(super) struct AncestorsBitSet { + bitset: Vec, + last_visited_bitset_pos: u32, +} + +impl AncestorsBitSet { + /// Creates bit set of the specified capacity. + pub fn with_capacity(len: u32) -> Self { + let bitset_len = usize::try_from(u32::div_ceil(len, u64::BITS)).unwrap(); + AncestorsBitSet { + bitset: vec![0; bitset_len], // request zeroed page + last_visited_bitset_pos: 0, + } + } + + /// Adds head `pos` to the set. + /// + /// Panics if the `pos` exceeds the capacity. + pub fn add_head(&mut self, pos: IndexPosition) { + let bitset_pos = pos.0 / u64::BITS; + let bit = 1_u64 << (pos.0 % u64::BITS); + self.bitset[usize::try_from(bitset_pos).unwrap()] |= bit; + self.last_visited_bitset_pos = max(self.last_visited_bitset_pos, bitset_pos + 1); + } + + /// Returns `true` if the given `pos` is ancestors of the heads. + /// + /// Panics if the `pos` exceeds the capacity or has not been visited yet. + pub fn contains(&self, pos: IndexPosition) -> bool { + let bitset_pos = pos.0 / u64::BITS; + let bit = 1_u64 << (pos.0 % u64::BITS); + assert!(bitset_pos >= self.last_visited_bitset_pos); + self.bitset[usize::try_from(bitset_pos).unwrap()] & bit != 0 + } + + /// Updates set by visiting ancestors until the given `to_visit_pos`. + pub fn visit_until(&mut self, index: CompositeIndex, to_visit_pos: IndexPosition) { + let to_visit_bitset_pos = to_visit_pos.0 / u64::BITS; + if to_visit_bitset_pos >= self.last_visited_bitset_pos { + return; + } + for visiting_bitset_pos in (to_visit_bitset_pos..self.last_visited_bitset_pos).rev() { + let mut unvisited_bits = self.bitset[usize::try_from(visiting_bitset_pos).unwrap()]; + while unvisited_bits != 0 { + let bit_pos = u64::BITS - unvisited_bits.leading_zeros() - 1; // from MSB + unvisited_bits ^= 1_u64 << bit_pos; + let current_pos = IndexPosition(visiting_bitset_pos * u64::BITS + bit_pos); + for parent_pos in index.entry_by_pos(current_pos).parent_positions() { + assert!(parent_pos < current_pos); + let parent_bitset_pos = parent_pos.0 / u64::BITS; + let bit = 1_u64 << (parent_pos.0 % u64::BITS); + self.bitset[usize::try_from(parent_bitset_pos).unwrap()] |= bit; + if visiting_bitset_pos == parent_bitset_pos { + unvisited_bits |= bit; + } + } + } + } + self.last_visited_bitset_pos = to_visit_bitset_pos; + } +} + #[cfg(test)] mod tests { use itertools::Itertools as _; @@ -512,6 +584,12 @@ mod tests { use super::*; use crate::backend::{ChangeId, CommitId}; + /// Generator of unique 16-byte CommitId excluding root id + fn commit_id_generator() -> impl FnMut() -> CommitId { + let mut iter = (1_u128..).map(|n| CommitId::new(n.to_le_bytes().into())); + move || iter.next().unwrap() + } + /// Generator of unique 16-byte ChangeId excluding root id fn change_id_generator() -> impl FnMut() -> ChangeId { let mut iter = (1_u128..).map(|n| ChangeId::new(n.to_le_bytes().into())); @@ -864,4 +942,135 @@ mod tests { [&id_2, &id_4, &id_7].map(Clone::clone) ); } + + #[test] + fn test_ancestors_bit_set() { + let mut new_commit_id = commit_id_generator(); + let mut new_change_id = change_id_generator(); + let mut mutable_index = DefaultMutableIndex::full(16, 16); + + // F F = 256 + // |\ E = 193,194,195,..,254 + // E | D D = 192,255 + // | |/ C = 66,68,70,..,190 + // B C B = 65,67,69,..,189,191 + // |/ A = 0,1,2,..,64 + // A + let id_a0 = new_commit_id(); + mutable_index.add_commit_data(id_a0.clone(), new_change_id(), &[]); + let id_a64 = (1..=64).fold(id_a0.clone(), |parent_id, i| { + assert_eq!(mutable_index.as_composite().num_commits(), i); + let id = new_commit_id(); + mutable_index.add_commit_data(id.clone(), new_change_id(), &[parent_id]); + id + }); + let (id_b189, id_c190) = (65..=190).step_by(2).fold( + (id_a64.clone(), id_a64.clone()), + |(parent_id_b, parent_id_c), i| { + assert_eq!(mutable_index.as_composite().num_commits(), i); + let id_b = new_commit_id(); + let id_c = new_commit_id(); + mutable_index.add_commit_data(id_b.clone(), new_change_id(), &[parent_id_b]); + mutable_index.add_commit_data(id_c.clone(), new_change_id(), &[parent_id_c]); + (id_b, id_c) + }, + ); + let id_b191 = new_commit_id(); + mutable_index.add_commit_data(id_b191.clone(), new_change_id(), &[id_b189]); + let id_d192 = new_commit_id(); + mutable_index.add_commit_data(id_d192.clone(), new_change_id(), &[id_c190.clone()]); + let id_e254 = (193..=254).fold(id_b191.clone(), |parent_id, i| { + assert_eq!(mutable_index.as_composite().num_commits(), i); + let id = new_commit_id(); + mutable_index.add_commit_data(id.clone(), new_change_id(), &[parent_id]); + id + }); + let id_d255 = new_commit_id(); + mutable_index.add_commit_data(id_d255.clone(), new_change_id(), &[id_d192.clone()]); + let id_f256 = new_commit_id(); + mutable_index.add_commit_data( + id_f256.clone(), + new_change_id(), + &[id_c190.clone(), id_e254.clone()], + ); + assert_eq!(mutable_index.as_composite().num_commits(), 257); + + let index = mutable_index.as_composite(); + let to_pos = |id: &CommitId| index.commit_id_to_pos(id).unwrap(); + let new_ancestors_set = |heads: &[&CommitId]| { + let mut set = AncestorsBitSet::with_capacity(index.num_commits()); + for &id in heads { + set.add_head(to_pos(id)); + } + set + }; + + // Nothing reachable + let set = new_ancestors_set(&[]); + assert_eq!(set.last_visited_bitset_pos, 0); + for pos in (0..=256).map(IndexPosition) { + assert!(!set.contains(pos), "{pos:?} should be unreachable"); + } + + // All reachable + let mut set = new_ancestors_set(&[&id_f256, &id_d255]); + assert_eq!(set.last_visited_bitset_pos, 5); + set.visit_until(index, to_pos(&id_f256)); + assert_eq!(set.last_visited_bitset_pos, 4); + assert!(set.contains(to_pos(&id_f256))); + set.visit_until(index, to_pos(&id_d192)); + assert_eq!(set.last_visited_bitset_pos, 3); + assert!(set.contains(to_pos(&id_e254))); + assert!(set.contains(to_pos(&id_d255))); + assert!(set.contains(to_pos(&id_d192))); + set.visit_until(index, to_pos(&id_a0)); + assert_eq!(set.last_visited_bitset_pos, 0); + set.visit_until(index, to_pos(&id_f256)); // should be noop + assert_eq!(set.last_visited_bitset_pos, 0); + for pos in (0..=256).map(IndexPosition) { + assert!(set.contains(pos), "{pos:?} should be reachable"); + } + + // A, B, C, E, F are reachable + let mut set = new_ancestors_set(&[&id_f256]); + assert_eq!(set.last_visited_bitset_pos, 5); + set.visit_until(index, to_pos(&id_f256)); + assert_eq!(set.last_visited_bitset_pos, 4); + assert!(set.contains(to_pos(&id_f256))); + set.visit_until(index, to_pos(&id_d192)); + assert_eq!(set.last_visited_bitset_pos, 3); + assert!(!set.contains(to_pos(&id_d255))); + assert!(!set.contains(to_pos(&id_d192))); + set.visit_until(index, to_pos(&id_c190)); + assert_eq!(set.last_visited_bitset_pos, 2); + assert!(set.contains(to_pos(&id_c190))); + set.visit_until(index, to_pos(&id_a64)); + assert_eq!(set.last_visited_bitset_pos, 1); + assert!(set.contains(to_pos(&id_b191))); + assert!(set.contains(to_pos(&id_a64))); + set.visit_until(index, to_pos(&id_a0)); + assert_eq!(set.last_visited_bitset_pos, 0); + assert!(set.contains(to_pos(&id_a0))); + + // A, C, D are reachable + let mut set = new_ancestors_set(&[&id_d255]); + assert_eq!(set.last_visited_bitset_pos, 4); + assert!(!set.contains(to_pos(&id_f256))); + set.visit_until(index, to_pos(&id_e254)); + assert_eq!(set.last_visited_bitset_pos, 3); + assert!(!set.contains(to_pos(&id_e254))); + set.visit_until(index, to_pos(&id_d255)); + assert_eq!(set.last_visited_bitset_pos, 3); + assert!(set.contains(to_pos(&id_d255))); + set.visit_until(index, to_pos(&id_b191)); + assert_eq!(set.last_visited_bitset_pos, 2); + assert!(!set.contains(to_pos(&id_b191))); + set.visit_until(index, to_pos(&id_c190)); + assert_eq!(set.last_visited_bitset_pos, 2); + assert!(set.contains(to_pos(&id_c190))); + set.visit_until(index, to_pos(&id_a0)); + assert_eq!(set.last_visited_bitset_pos, 0); + assert!(set.contains(to_pos(&id_a64))); + assert!(set.contains(to_pos(&id_a0))); + } }