From f73e590837a5f1e18e213931c4a913f70ddd5fb0 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Fri, 22 Dec 2023 16:02:36 +0900 Subject: [PATCH] index: implement segment-level change id lookup methods In resolve_change_id_prefix(), I've implemented two different ways of collecting the overflow items. I don't think they impact the performance, but we can switch to the alternative method as needed. --- lib/src/default_index/composite.rs | 11 ++ lib/src/default_index/mod.rs | 204 ++++++++++++++++++++++++++++- lib/src/default_index/mutable.rs | 26 +++- lib/src/default_index/readonly.rs | 98 +++++++++++++- 4 files changed, 330 insertions(+), 9 deletions(-) diff --git a/lib/src/default_index/composite.rs b/lib/src/default_index/composite.rs index c4fbf5fd9..79f2aa6fc 100644 --- a/lib/src/default_index/composite.rs +++ b/lib/src/default_index/composite.rs @@ -23,6 +23,7 @@ use itertools::Itertools; use super::entry::{ IndexEntry, IndexPosition, IndexPositionByGeneration, LocalPosition, SmallIndexPositionsVec, + SmallLocalPositionsVec, }; use super::readonly::ReadonlyIndexSegment; use super::rev_walk::RevWalk; @@ -55,6 +56,16 @@ pub(super) trait IndexSegment: Send + Sync { fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution; + fn resolve_neighbor_change_ids( + &self, + change_id: &ChangeId, + ) -> (Option, Option); + + fn resolve_change_id_prefix( + &self, + prefix: &HexPrefix, + ) -> PrefixResolution<(ChangeId, SmallLocalPositionsVec)>; + fn generation_number(&self, local_pos: LocalPosition) -> u32; fn commit_id(&self, local_pos: LocalPosition) -> CommitId; diff --git a/lib/src/default_index/mod.rs b/lib/src/default_index/mod.rs index 1b94ac970..a11c710cf 100644 --- a/lib/src/default_index/mod.rs +++ b/lib/src/default_index/mod.rs @@ -46,9 +46,16 @@ mod tests { use super::mutable::MutableIndexSegment; use super::*; use crate::backend::{ChangeId, CommitId}; + use crate::default_index::entry::{LocalPosition, SmallLocalPositionsVec}; use crate::index::Index; use crate::object_id::{HexPrefix, ObjectId, PrefixResolution}; + /// Generator of unique 16-byte CommitId excluding root id + fn commit_id_generator() -> impl FnMut() -> CommitId { + let mut iter = (1_u128..).map(|n| CommitId::new(n.to_le_bytes().into())); + move || iter.next().unwrap() + } + /// Generator of unique 16-byte ChangeId excluding root id fn change_id_generator() -> impl FnMut() -> ChangeId { let mut iter = (1_u128..).map(|n| ChangeId::new(n.to_le_bytes().into())); @@ -328,7 +335,7 @@ mod tests { } #[test] - fn resolve_prefix() { + fn resolve_commit_id_prefix() { let temp_dir = testutils::new_temp_dir(); let mut new_change_id = change_id_generator(); let mut mutable_segment = MutableIndexSegment::full(3, 16); @@ -581,6 +588,201 @@ mod tests { ); } + #[test] + fn resolve_change_id_prefix() { + let temp_dir = testutils::new_temp_dir(); + let mut new_commit_id = commit_id_generator(); + let local_positions_vec = |positions: &[u32]| -> SmallLocalPositionsVec { + positions.iter().copied().map(LocalPosition).collect() + }; + + let id_0 = ChangeId::from_hex("00000001"); + let id_1 = ChangeId::from_hex("00999999"); + let id_2 = ChangeId::from_hex("05548888"); + let id_3 = ChangeId::from_hex("05544444"); + let id_4 = ChangeId::from_hex("05555555"); + let id_5 = ChangeId::from_hex("05555333"); + + // Create some commits with different various common prefixes. + let mut mutable_segment = MutableIndexSegment::full(16, 4); + mutable_segment.add_commit_data(new_commit_id(), id_0.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]); + + // Write these commits to one file and build the remainder on top. + let initial_file = mutable_segment.save_in(temp_dir.path()).unwrap(); + mutable_segment = MutableIndexSegment::incremental(initial_file.clone()); + + mutable_segment.add_commit_data(new_commit_id(), id_3.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_3.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_4.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_5.clone(), &[]); + + // Local lookup in readonly index with the full hex digits + assert_eq!( + initial_file.resolve_change_id_prefix(&HexPrefix::new(&id_0.hex()).unwrap()), + PrefixResolution::SingleMatch((id_0.clone(), local_positions_vec(&[0]))) + ); + assert_eq!( + initial_file.resolve_change_id_prefix(&HexPrefix::new(&id_1.hex()).unwrap()), + PrefixResolution::SingleMatch((id_1.clone(), local_positions_vec(&[1, 3]))) + ); + assert_eq!( + initial_file.resolve_change_id_prefix(&HexPrefix::new(&id_2.hex()).unwrap()), + PrefixResolution::SingleMatch((id_2.clone(), local_positions_vec(&[2, 4, 5]))) + ); + + // Local lookup in mutable index with the full hex digits + assert_eq!( + mutable_segment.resolve_change_id_prefix(&HexPrefix::new(&id_1.hex()).unwrap()), + PrefixResolution::SingleMatch((id_1.clone(), local_positions_vec(&[3]))) + ); + assert_eq!( + mutable_segment.resolve_change_id_prefix(&HexPrefix::new(&id_3.hex()).unwrap()), + PrefixResolution::SingleMatch((id_3.clone(), local_positions_vec(&[0, 1]))) + ); + assert_eq!( + mutable_segment.resolve_change_id_prefix(&HexPrefix::new(&id_4.hex()).unwrap()), + PrefixResolution::SingleMatch((id_4.clone(), local_positions_vec(&[2]))) + ); + assert_eq!( + mutable_segment.resolve_change_id_prefix(&HexPrefix::new(&id_5.hex()).unwrap()), + PrefixResolution::SingleMatch((id_5.clone(), local_positions_vec(&[4]))) + ); + + // Local lookup with locally unknown prefix + assert_eq!( + initial_file.resolve_change_id_prefix(&HexPrefix::new("0555").unwrap()), + PrefixResolution::NoMatch + ); + assert_eq!( + mutable_segment.resolve_change_id_prefix(&HexPrefix::new("000").unwrap()), + PrefixResolution::NoMatch + ); + + // Local lookup with locally unique prefix + assert_eq!( + initial_file.resolve_change_id_prefix(&HexPrefix::new("0554").unwrap()), + PrefixResolution::SingleMatch((id_2.clone(), local_positions_vec(&[2, 4, 5]))) + ); + assert_eq!( + mutable_segment.resolve_change_id_prefix(&HexPrefix::new("0554").unwrap()), + PrefixResolution::SingleMatch((id_3.clone(), local_positions_vec(&[0, 1]))) + ); + + // Local lookup with locally ambiguous prefix + assert_eq!( + initial_file.resolve_change_id_prefix(&HexPrefix::new("00").unwrap()), + PrefixResolution::AmbiguousMatch + ); + assert_eq!( + mutable_segment.resolve_change_id_prefix(&HexPrefix::new("05555").unwrap()), + PrefixResolution::AmbiguousMatch + ); + } + + #[test] + fn neighbor_change_ids() { + let temp_dir = testutils::new_temp_dir(); + let mut new_commit_id = commit_id_generator(); + + let id_0 = ChangeId::from_hex("00000001"); + let id_1 = ChangeId::from_hex("00999999"); + let id_2 = ChangeId::from_hex("05548888"); + let id_3 = ChangeId::from_hex("05544444"); + let id_4 = ChangeId::from_hex("05555555"); + let id_5 = ChangeId::from_hex("05555333"); + + // Create some commits with different various common prefixes. + let mut mutable_segment = MutableIndexSegment::full(16, 4); + mutable_segment.add_commit_data(new_commit_id(), id_0.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]); + + // Write these commits to one file and build the remainder on top. + let initial_file = mutable_segment.save_in(temp_dir.path()).unwrap(); + mutable_segment = MutableIndexSegment::incremental(initial_file.clone()); + + mutable_segment.add_commit_data(new_commit_id(), id_3.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_3.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_4.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]); + mutable_segment.add_commit_data(new_commit_id(), id_5.clone(), &[]); + + // Local lookup in readonly index, change_id exists. + assert_eq!( + initial_file.resolve_neighbor_change_ids(&id_0), + (None, Some(id_1.clone())), + ); + assert_eq!( + initial_file.resolve_neighbor_change_ids(&id_1), + (Some(id_0.clone()), Some(id_2.clone())), + ); + assert_eq!( + initial_file.resolve_neighbor_change_ids(&id_2), + (Some(id_1.clone()), None), + ); + + // Local lookup in readonly index, change_id does not exist. + assert_eq!( + initial_file.resolve_neighbor_change_ids(&ChangeId::from_hex("00000000")), + (None, Some(id_0.clone())), + ); + assert_eq!( + initial_file.resolve_neighbor_change_ids(&ChangeId::from_hex("00000002")), + (Some(id_0.clone()), Some(id_1.clone())), + ); + assert_eq!( + initial_file.resolve_neighbor_change_ids(&ChangeId::from_hex("ffffffff")), + (Some(id_2.clone()), None), + ); + + // Local lookup in mutable index, change_id exists. + // id_1 < id_3 < id_5 < id_4 + assert_eq!( + mutable_segment.resolve_neighbor_change_ids(&id_1), + (None, Some(id_3.clone())), + ); + assert_eq!( + mutable_segment.resolve_neighbor_change_ids(&id_3), + (Some(id_1.clone()), Some(id_5.clone())), + ); + assert_eq!( + mutable_segment.resolve_neighbor_change_ids(&id_5), + (Some(id_3.clone()), Some(id_4.clone())), + ); + assert_eq!( + mutable_segment.resolve_neighbor_change_ids(&id_4), + (Some(id_5.clone()), None), + ); + + // Local lookup in mutable index, change_id does not exist. + // id_1 < id_3 < id_5 < id_4 + assert_eq!( + mutable_segment.resolve_neighbor_change_ids(&ChangeId::from_hex("00999998")), + (None, Some(id_1.clone())), + ); + assert_eq!( + mutable_segment.resolve_neighbor_change_ids(&ChangeId::from_hex("01000000")), + (Some(id_1.clone()), Some(id_3.clone())), + ); + assert_eq!( + mutable_segment.resolve_neighbor_change_ids(&ChangeId::from_hex("05555332")), + (Some(id_3.clone()), Some(id_5.clone())), + ); + assert_eq!( + mutable_segment.resolve_neighbor_change_ids(&ChangeId::from_hex("ffffffff")), + (Some(id_4.clone()), None), + ); + } + #[test] fn test_is_ancestor() { let mut new_change_id = change_id_generator(); diff --git a/lib/src/default_index/mutable.rs b/lib/src/default_index/mutable.rs index 3153feb25..d979af78d 100644 --- a/lib/src/default_index/mutable.rs +++ b/lib/src/default_index/mutable.rs @@ -391,7 +391,24 @@ impl IndexSegment for MutableIndexSegment { fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes()); - resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|id| id.clone()) + resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|(id, _)| id.clone()) + } + + fn resolve_neighbor_change_ids( + &self, + change_id: &ChangeId, + ) -> (Option, Option) { + let (prev_id, next_id) = resolve_neighbor_ids(&self.change_lookup, change_id); + (prev_id.cloned(), next_id.cloned()) + } + + fn resolve_change_id_prefix( + &self, + prefix: &HexPrefix, + ) -> PrefixResolution<(ChangeId, SmallLocalPositionsVec)> { + let min_bytes_prefix = ChangeId::from_bytes(prefix.min_prefix_bytes()); + resolve_id_prefix(&self.change_lookup, prefix, &min_bytes_prefix) + .map(|(id, positions)| (id.clone(), positions.clone())) } fn generation_number(&self, local_pos: LocalPosition) -> u32 { @@ -551,14 +568,13 @@ fn resolve_id_prefix<'a, K: ObjectId + Ord, V>( lookup_table: &'a BTreeMap, prefix: &HexPrefix, min_bytes_prefix: &K, -) -> PrefixResolution<&'a K> { +) -> PrefixResolution<(&'a K, &'a V)> { let mut matches = lookup_table .range((Bound::Included(min_bytes_prefix), Bound::Unbounded)) - .map(|(id, _pos)| id) - .take_while(|&id| prefix.matches(id)) + .take_while(|&(id, _)| prefix.matches(id)) .fuse(); match (matches.next(), matches.next()) { - (Some(id), None) => PrefixResolution::SingleMatch(id), + (Some(entry), None) => PrefixResolution::SingleMatch(entry), (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch, (None, _) => PrefixResolution::NoMatch, } diff --git a/lib/src/default_index/readonly.rs b/lib/src/default_index/readonly.rs index 656578d71..688729ff1 100644 --- a/lib/src/default_index/readonly.rs +++ b/lib/src/default_index/readonly.rs @@ -27,7 +27,7 @@ use smallvec::smallvec; use thiserror::Error; use super::composite::{AsCompositeIndex, ChangeIdIndexImpl, CompositeIndex, IndexSegment}; -use super::entry::{IndexPosition, LocalPosition, SmallIndexPositionsVec}; +use super::entry::{IndexPosition, LocalPosition, SmallIndexPositionsVec, SmallLocalPositionsVec}; use super::mutable::DefaultMutableIndex; use crate::backend::{ChangeId, CommitId}; use crate::index::{AllHeadsForGcUnsupported, ChangeIdIndex, Index, MutableIndex, ReadonlyIndex}; @@ -92,6 +92,20 @@ impl ParentIndexPosition { } } +/// Local position of entry pointed by change id, or overflow pointer. +#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] +struct ChangeLocalPosition(u32); + +impl ChangeLocalPosition { + fn as_inlined(self) -> Option { + (self.0 & OVERFLOW_FLAG == 0).then_some(LocalPosition(self.0)) + } + + fn as_overflow(self) -> Option { + (self.0 & OVERFLOW_FLAG != 0).then_some(!self.0) + } +} + struct CommitGraphEntry<'a> { data: &'a [u8], } @@ -206,6 +220,7 @@ pub(super) struct ReadonlyIndexSegment { num_local_commits: u32, num_local_change_ids: u32, num_parent_overflow_entries: u32, + num_change_overflow_entries: u32, data: Vec, } @@ -334,6 +349,7 @@ impl ReadonlyIndexSegment { num_local_commits, num_local_change_ids, num_parent_overflow_entries, + num_change_overflow_entries, data, })) } @@ -385,6 +401,17 @@ impl ReadonlyIndexSegment { &self.data[offset..][..self.change_id_length] } + fn change_lookup_pos(&self, lookup_pos: u32) -> ChangeLocalPosition { + assert!(lookup_pos < self.num_local_change_ids); + let offset = (lookup_pos as usize) * 4 + + (self.num_local_commits as usize) * self.commit_graph_entry_size + + (self.num_local_commits as usize) * self.commit_lookup_entry_size + + (self.num_local_change_ids as usize) * self.change_id_length; + ChangeLocalPosition(u32::from_le_bytes( + self.data[offset..][..4].try_into().unwrap(), + )) + } + fn overflow_parents(&self, overflow_pos: u32, num_parents: u32) -> SmallIndexPositionsVec { assert!(overflow_pos + num_parents <= self.num_parent_overflow_entries); let offset = (overflow_pos as usize) * 4 @@ -398,6 +425,20 @@ impl ReadonlyIndexSegment { .collect() } + /// Scans graph entry positions stored in the overflow change ids table. + fn overflow_changes_from(&self, overflow_pos: u32) -> impl Iterator + '_ { + let base = (self.num_local_commits as usize) * self.commit_graph_entry_size + + (self.num_local_commits as usize) * self.commit_lookup_entry_size + + (self.num_local_change_ids as usize) * self.change_id_length + + (self.num_local_change_ids as usize) * 4 + + (self.num_parent_overflow_entries as usize) * 4; + let size = (self.num_change_overflow_entries as usize) * 4; + let offset = (overflow_pos as usize) * 4; + self.data[base..][..size][offset..] + .chunks_exact(4) + .map(|chunk| LocalPosition(u32::from_le_bytes(chunk.try_into().unwrap()))) + } + /// Binary searches commit id by `prefix`. Returns the lookup position. fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> PositionLookupResult { binary_search_pos_by(self.num_local_commits, |pos| { @@ -405,6 +446,14 @@ impl ReadonlyIndexSegment { entry.commit_id_bytes().cmp(prefix) }) } + + /// Binary searches change id by `prefix`. Returns the lookup position. + fn change_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> PositionLookupResult { + binary_search_pos_by(self.num_local_change_ids, |pos| { + let change_id_bytes = self.change_lookup_id_bytes(pos); + change_id_bytes.cmp(prefix) + }) + } } impl IndexSegment for ReadonlyIndexSegment { @@ -443,6 +492,49 @@ impl IndexSegment for ReadonlyIndexSegment { fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution { self.commit_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes()) .prefix_matches(prefix, |pos| self.commit_lookup_entry(pos).commit_id()) + .map(|(id, _)| id) + } + + fn resolve_neighbor_change_ids( + &self, + change_id: &ChangeId, + ) -> (Option, Option) { + self.change_id_byte_prefix_to_lookup_pos(change_id.as_bytes()) + .map_neighbors(|pos| self.change_lookup_id(pos)) + } + + fn resolve_change_id_prefix( + &self, + prefix: &HexPrefix, + ) -> PrefixResolution<(ChangeId, SmallLocalPositionsVec)> { + self.change_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes()) + .prefix_matches(prefix, |pos| self.change_lookup_id(pos)) + .map(|(id, lookup_pos)| { + let change_pos = self.change_lookup_pos(lookup_pos); + if let Some(local_pos) = change_pos.as_inlined() { + (id, smallvec![local_pos]) + } else { + let overflow_pos = change_pos.as_overflow().unwrap(); + // Collect commits having the same change id. For cache + // locality, it might be better to look for the next few + // change id positions to determine the size. + let positions: SmallLocalPositionsVec = self + .overflow_changes_from(overflow_pos) + .take_while(|&local_pos| { + let entry = self.graph_entry(local_pos); + entry.change_id_lookup_pos() == lookup_pos + }) + .collect(); + debug_assert_eq!( + overflow_pos + u32::try_from(positions.len()).unwrap(), + (lookup_pos + 1..self.num_local_change_ids) + .find_map(|lookup_pos| self.change_lookup_pos(lookup_pos).as_overflow()) + .unwrap_or(self.num_change_overflow_entries), + "all overflow positions to the next change id should be collected" + ); + (id, positions) + } + }) } fn generation_number(&self, local_pos: LocalPosition) -> u32 { @@ -609,14 +701,14 @@ impl PositionLookupResult { self, prefix: &HexPrefix, lookup: impl FnMut(u32) -> T, - ) -> PrefixResolution { + ) -> PrefixResolution<(T, u32)> { let lookup_pos = self.result.unwrap_or_else(|pos| pos); let mut matches = (lookup_pos..self.size) .map(lookup) .take_while(|id| prefix.matches(id)) .fuse(); match (matches.next(), matches.next()) { - (Some(id), None) => PrefixResolution::SingleMatch(id), + (Some(id), None) => PrefixResolution::SingleMatch((id, lookup_pos)), (Some(_), Some(_)) => PrefixResolution::AmbiguousMatch, (None, _) => PrefixResolution::NoMatch, }