index: implement segment-level change id lookup methods

In resolve_change_id_prefix(), I've implemented two different ways of
collecting the overflow items. I don't think they impact the performance,
but we can switch to the alternative method as needed.
This commit is contained in:
Yuya Nishihara 2023-12-22 16:02:36 +09:00
parent 8cdf6d752c
commit f73e590837
4 changed files with 330 additions and 9 deletions

View file

@ -23,6 +23,7 @@ use itertools::Itertools;
use super::entry::{
IndexEntry, IndexPosition, IndexPositionByGeneration, LocalPosition, SmallIndexPositionsVec,
SmallLocalPositionsVec,
};
use super::readonly::ReadonlyIndexSegment;
use super::rev_walk::RevWalk;
@ -55,6 +56,16 @@ pub(super) trait IndexSegment: Send + Sync {
fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId>;
fn resolve_neighbor_change_ids(
&self,
change_id: &ChangeId,
) -> (Option<ChangeId>, Option<ChangeId>);
fn resolve_change_id_prefix(
&self,
prefix: &HexPrefix,
) -> PrefixResolution<(ChangeId, SmallLocalPositionsVec)>;
fn generation_number(&self, local_pos: LocalPosition) -> u32;
fn commit_id(&self, local_pos: LocalPosition) -> CommitId;

View file

@ -46,9 +46,16 @@ mod tests {
use super::mutable::MutableIndexSegment;
use super::*;
use crate::backend::{ChangeId, CommitId};
use crate::default_index::entry::{LocalPosition, SmallLocalPositionsVec};
use crate::index::Index;
use crate::object_id::{HexPrefix, ObjectId, PrefixResolution};
/// Generator of unique 16-byte CommitId excluding root id
fn commit_id_generator() -> impl FnMut() -> CommitId {
let mut iter = (1_u128..).map(|n| CommitId::new(n.to_le_bytes().into()));
move || iter.next().unwrap()
}
/// Generator of unique 16-byte ChangeId excluding root id
fn change_id_generator() -> impl FnMut() -> ChangeId {
let mut iter = (1_u128..).map(|n| ChangeId::new(n.to_le_bytes().into()));
@ -328,7 +335,7 @@ mod tests {
}
#[test]
fn resolve_prefix() {
fn resolve_commit_id_prefix() {
let temp_dir = testutils::new_temp_dir();
let mut new_change_id = change_id_generator();
let mut mutable_segment = MutableIndexSegment::full(3, 16);
@ -581,6 +588,201 @@ mod tests {
);
}
#[test]
fn resolve_change_id_prefix() {
let temp_dir = testutils::new_temp_dir();
let mut new_commit_id = commit_id_generator();
let local_positions_vec = |positions: &[u32]| -> SmallLocalPositionsVec {
positions.iter().copied().map(LocalPosition).collect()
};
let id_0 = ChangeId::from_hex("00000001");
let id_1 = ChangeId::from_hex("00999999");
let id_2 = ChangeId::from_hex("05548888");
let id_3 = ChangeId::from_hex("05544444");
let id_4 = ChangeId::from_hex("05555555");
let id_5 = ChangeId::from_hex("05555333");
// Create some commits with different various common prefixes.
let mut mutable_segment = MutableIndexSegment::full(16, 4);
mutable_segment.add_commit_data(new_commit_id(), id_0.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]);
// Write these commits to one file and build the remainder on top.
let initial_file = mutable_segment.save_in(temp_dir.path()).unwrap();
mutable_segment = MutableIndexSegment::incremental(initial_file.clone());
mutable_segment.add_commit_data(new_commit_id(), id_3.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_3.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_4.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_5.clone(), &[]);
// Local lookup in readonly index with the full hex digits
assert_eq!(
initial_file.resolve_change_id_prefix(&HexPrefix::new(&id_0.hex()).unwrap()),
PrefixResolution::SingleMatch((id_0.clone(), local_positions_vec(&[0])))
);
assert_eq!(
initial_file.resolve_change_id_prefix(&HexPrefix::new(&id_1.hex()).unwrap()),
PrefixResolution::SingleMatch((id_1.clone(), local_positions_vec(&[1, 3])))
);
assert_eq!(
initial_file.resolve_change_id_prefix(&HexPrefix::new(&id_2.hex()).unwrap()),
PrefixResolution::SingleMatch((id_2.clone(), local_positions_vec(&[2, 4, 5])))
);
// Local lookup in mutable index with the full hex digits
assert_eq!(
mutable_segment.resolve_change_id_prefix(&HexPrefix::new(&id_1.hex()).unwrap()),
PrefixResolution::SingleMatch((id_1.clone(), local_positions_vec(&[3])))
);
assert_eq!(
mutable_segment.resolve_change_id_prefix(&HexPrefix::new(&id_3.hex()).unwrap()),
PrefixResolution::SingleMatch((id_3.clone(), local_positions_vec(&[0, 1])))
);
assert_eq!(
mutable_segment.resolve_change_id_prefix(&HexPrefix::new(&id_4.hex()).unwrap()),
PrefixResolution::SingleMatch((id_4.clone(), local_positions_vec(&[2])))
);
assert_eq!(
mutable_segment.resolve_change_id_prefix(&HexPrefix::new(&id_5.hex()).unwrap()),
PrefixResolution::SingleMatch((id_5.clone(), local_positions_vec(&[4])))
);
// Local lookup with locally unknown prefix
assert_eq!(
initial_file.resolve_change_id_prefix(&HexPrefix::new("0555").unwrap()),
PrefixResolution::NoMatch
);
assert_eq!(
mutable_segment.resolve_change_id_prefix(&HexPrefix::new("000").unwrap()),
PrefixResolution::NoMatch
);
// Local lookup with locally unique prefix
assert_eq!(
initial_file.resolve_change_id_prefix(&HexPrefix::new("0554").unwrap()),
PrefixResolution::SingleMatch((id_2.clone(), local_positions_vec(&[2, 4, 5])))
);
assert_eq!(
mutable_segment.resolve_change_id_prefix(&HexPrefix::new("0554").unwrap()),
PrefixResolution::SingleMatch((id_3.clone(), local_positions_vec(&[0, 1])))
);
// Local lookup with locally ambiguous prefix
assert_eq!(
initial_file.resolve_change_id_prefix(&HexPrefix::new("00").unwrap()),
PrefixResolution::AmbiguousMatch
);
assert_eq!(
mutable_segment.resolve_change_id_prefix(&HexPrefix::new("05555").unwrap()),
PrefixResolution::AmbiguousMatch
);
}
#[test]
fn neighbor_change_ids() {
let temp_dir = testutils::new_temp_dir();
let mut new_commit_id = commit_id_generator();
let id_0 = ChangeId::from_hex("00000001");
let id_1 = ChangeId::from_hex("00999999");
let id_2 = ChangeId::from_hex("05548888");
let id_3 = ChangeId::from_hex("05544444");
let id_4 = ChangeId::from_hex("05555555");
let id_5 = ChangeId::from_hex("05555333");
// Create some commits with different various common prefixes.
let mut mutable_segment = MutableIndexSegment::full(16, 4);
mutable_segment.add_commit_data(new_commit_id(), id_0.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_2.clone(), &[]);
// Write these commits to one file and build the remainder on top.
let initial_file = mutable_segment.save_in(temp_dir.path()).unwrap();
mutable_segment = MutableIndexSegment::incremental(initial_file.clone());
mutable_segment.add_commit_data(new_commit_id(), id_3.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_3.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_4.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_1.clone(), &[]);
mutable_segment.add_commit_data(new_commit_id(), id_5.clone(), &[]);
// Local lookup in readonly index, change_id exists.
assert_eq!(
initial_file.resolve_neighbor_change_ids(&id_0),
(None, Some(id_1.clone())),
);
assert_eq!(
initial_file.resolve_neighbor_change_ids(&id_1),
(Some(id_0.clone()), Some(id_2.clone())),
);
assert_eq!(
initial_file.resolve_neighbor_change_ids(&id_2),
(Some(id_1.clone()), None),
);
// Local lookup in readonly index, change_id does not exist.
assert_eq!(
initial_file.resolve_neighbor_change_ids(&ChangeId::from_hex("00000000")),
(None, Some(id_0.clone())),
);
assert_eq!(
initial_file.resolve_neighbor_change_ids(&ChangeId::from_hex("00000002")),
(Some(id_0.clone()), Some(id_1.clone())),
);
assert_eq!(
initial_file.resolve_neighbor_change_ids(&ChangeId::from_hex("ffffffff")),
(Some(id_2.clone()), None),
);
// Local lookup in mutable index, change_id exists.
// id_1 < id_3 < id_5 < id_4
assert_eq!(
mutable_segment.resolve_neighbor_change_ids(&id_1),
(None, Some(id_3.clone())),
);
assert_eq!(
mutable_segment.resolve_neighbor_change_ids(&id_3),
(Some(id_1.clone()), Some(id_5.clone())),
);
assert_eq!(
mutable_segment.resolve_neighbor_change_ids(&id_5),
(Some(id_3.clone()), Some(id_4.clone())),
);
assert_eq!(
mutable_segment.resolve_neighbor_change_ids(&id_4),
(Some(id_5.clone()), None),
);
// Local lookup in mutable index, change_id does not exist.
// id_1 < id_3 < id_5 < id_4
assert_eq!(
mutable_segment.resolve_neighbor_change_ids(&ChangeId::from_hex("00999998")),
(None, Some(id_1.clone())),
);
assert_eq!(
mutable_segment.resolve_neighbor_change_ids(&ChangeId::from_hex("01000000")),
(Some(id_1.clone()), Some(id_3.clone())),
);
assert_eq!(
mutable_segment.resolve_neighbor_change_ids(&ChangeId::from_hex("05555332")),
(Some(id_3.clone()), Some(id_5.clone())),
);
assert_eq!(
mutable_segment.resolve_neighbor_change_ids(&ChangeId::from_hex("ffffffff")),
(Some(id_4.clone()), None),
);
}
#[test]
fn test_is_ancestor() {
let mut new_change_id = change_id_generator();

View file

@ -391,7 +391,24 @@ impl IndexSegment for MutableIndexSegment {
fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
let min_bytes_prefix = CommitId::from_bytes(prefix.min_prefix_bytes());
resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|id| id.clone())
resolve_id_prefix(&self.commit_lookup, prefix, &min_bytes_prefix).map(|(id, _)| id.clone())
}
fn resolve_neighbor_change_ids(
&self,
change_id: &ChangeId,
) -> (Option<ChangeId>, Option<ChangeId>) {
let (prev_id, next_id) = resolve_neighbor_ids(&self.change_lookup, change_id);
(prev_id.cloned(), next_id.cloned())
}
fn resolve_change_id_prefix(
&self,
prefix: &HexPrefix,
) -> PrefixResolution<(ChangeId, SmallLocalPositionsVec)> {
let min_bytes_prefix = ChangeId::from_bytes(prefix.min_prefix_bytes());
resolve_id_prefix(&self.change_lookup, prefix, &min_bytes_prefix)
.map(|(id, positions)| (id.clone(), positions.clone()))
}
fn generation_number(&self, local_pos: LocalPosition) -> u32 {
@ -551,14 +568,13 @@ fn resolve_id_prefix<'a, K: ObjectId + Ord, V>(
lookup_table: &'a BTreeMap<K, V>,
prefix: &HexPrefix,
min_bytes_prefix: &K,
) -> PrefixResolution<&'a K> {
) -> PrefixResolution<(&'a K, &'a V)> {
let mut matches = lookup_table
.range((Bound::Included(min_bytes_prefix), Bound::Unbounded))
.map(|(id, _pos)| id)
.take_while(|&id| prefix.matches(id))
.take_while(|&(id, _)| prefix.matches(id))
.fuse();
match (matches.next(), matches.next()) {
(Some(id), None) => PrefixResolution::SingleMatch(id),
(Some(entry), None) => PrefixResolution::SingleMatch(entry),
(Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
(None, _) => PrefixResolution::NoMatch,
}

View file

@ -27,7 +27,7 @@ use smallvec::smallvec;
use thiserror::Error;
use super::composite::{AsCompositeIndex, ChangeIdIndexImpl, CompositeIndex, IndexSegment};
use super::entry::{IndexPosition, LocalPosition, SmallIndexPositionsVec};
use super::entry::{IndexPosition, LocalPosition, SmallIndexPositionsVec, SmallLocalPositionsVec};
use super::mutable::DefaultMutableIndex;
use crate::backend::{ChangeId, CommitId};
use crate::index::{AllHeadsForGcUnsupported, ChangeIdIndex, Index, MutableIndex, ReadonlyIndex};
@ -92,6 +92,20 @@ impl ParentIndexPosition {
}
}
/// Local position of entry pointed by change id, or overflow pointer.
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
struct ChangeLocalPosition(u32);
impl ChangeLocalPosition {
fn as_inlined(self) -> Option<LocalPosition> {
(self.0 & OVERFLOW_FLAG == 0).then_some(LocalPosition(self.0))
}
fn as_overflow(self) -> Option<u32> {
(self.0 & OVERFLOW_FLAG != 0).then_some(!self.0)
}
}
struct CommitGraphEntry<'a> {
data: &'a [u8],
}
@ -206,6 +220,7 @@ pub(super) struct ReadonlyIndexSegment {
num_local_commits: u32,
num_local_change_ids: u32,
num_parent_overflow_entries: u32,
num_change_overflow_entries: u32,
data: Vec<u8>,
}
@ -334,6 +349,7 @@ impl ReadonlyIndexSegment {
num_local_commits,
num_local_change_ids,
num_parent_overflow_entries,
num_change_overflow_entries,
data,
}))
}
@ -385,6 +401,17 @@ impl ReadonlyIndexSegment {
&self.data[offset..][..self.change_id_length]
}
fn change_lookup_pos(&self, lookup_pos: u32) -> ChangeLocalPosition {
assert!(lookup_pos < self.num_local_change_ids);
let offset = (lookup_pos as usize) * 4
+ (self.num_local_commits as usize) * self.commit_graph_entry_size
+ (self.num_local_commits as usize) * self.commit_lookup_entry_size
+ (self.num_local_change_ids as usize) * self.change_id_length;
ChangeLocalPosition(u32::from_le_bytes(
self.data[offset..][..4].try_into().unwrap(),
))
}
fn overflow_parents(&self, overflow_pos: u32, num_parents: u32) -> SmallIndexPositionsVec {
assert!(overflow_pos + num_parents <= self.num_parent_overflow_entries);
let offset = (overflow_pos as usize) * 4
@ -398,6 +425,20 @@ impl ReadonlyIndexSegment {
.collect()
}
/// Scans graph entry positions stored in the overflow change ids table.
fn overflow_changes_from(&self, overflow_pos: u32) -> impl Iterator<Item = LocalPosition> + '_ {
let base = (self.num_local_commits as usize) * self.commit_graph_entry_size
+ (self.num_local_commits as usize) * self.commit_lookup_entry_size
+ (self.num_local_change_ids as usize) * self.change_id_length
+ (self.num_local_change_ids as usize) * 4
+ (self.num_parent_overflow_entries as usize) * 4;
let size = (self.num_change_overflow_entries as usize) * 4;
let offset = (overflow_pos as usize) * 4;
self.data[base..][..size][offset..]
.chunks_exact(4)
.map(|chunk| LocalPosition(u32::from_le_bytes(chunk.try_into().unwrap())))
}
/// Binary searches commit id by `prefix`. Returns the lookup position.
fn commit_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> PositionLookupResult {
binary_search_pos_by(self.num_local_commits, |pos| {
@ -405,6 +446,14 @@ impl ReadonlyIndexSegment {
entry.commit_id_bytes().cmp(prefix)
})
}
/// Binary searches change id by `prefix`. Returns the lookup position.
fn change_id_byte_prefix_to_lookup_pos(&self, prefix: &[u8]) -> PositionLookupResult {
binary_search_pos_by(self.num_local_change_ids, |pos| {
let change_id_bytes = self.change_lookup_id_bytes(pos);
change_id_bytes.cmp(prefix)
})
}
}
impl IndexSegment for ReadonlyIndexSegment {
@ -443,6 +492,49 @@ impl IndexSegment for ReadonlyIndexSegment {
fn resolve_commit_id_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<CommitId> {
self.commit_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes())
.prefix_matches(prefix, |pos| self.commit_lookup_entry(pos).commit_id())
.map(|(id, _)| id)
}
fn resolve_neighbor_change_ids(
&self,
change_id: &ChangeId,
) -> (Option<ChangeId>, Option<ChangeId>) {
self.change_id_byte_prefix_to_lookup_pos(change_id.as_bytes())
.map_neighbors(|pos| self.change_lookup_id(pos))
}
fn resolve_change_id_prefix(
&self,
prefix: &HexPrefix,
) -> PrefixResolution<(ChangeId, SmallLocalPositionsVec)> {
self.change_id_byte_prefix_to_lookup_pos(prefix.min_prefix_bytes())
.prefix_matches(prefix, |pos| self.change_lookup_id(pos))
.map(|(id, lookup_pos)| {
let change_pos = self.change_lookup_pos(lookup_pos);
if let Some(local_pos) = change_pos.as_inlined() {
(id, smallvec![local_pos])
} else {
let overflow_pos = change_pos.as_overflow().unwrap();
// Collect commits having the same change id. For cache
// locality, it might be better to look for the next few
// change id positions to determine the size.
let positions: SmallLocalPositionsVec = self
.overflow_changes_from(overflow_pos)
.take_while(|&local_pos| {
let entry = self.graph_entry(local_pos);
entry.change_id_lookup_pos() == lookup_pos
})
.collect();
debug_assert_eq!(
overflow_pos + u32::try_from(positions.len()).unwrap(),
(lookup_pos + 1..self.num_local_change_ids)
.find_map(|lookup_pos| self.change_lookup_pos(lookup_pos).as_overflow())
.unwrap_or(self.num_change_overflow_entries),
"all overflow positions to the next change id should be collected"
);
(id, positions)
}
})
}
fn generation_number(&self, local_pos: LocalPosition) -> u32 {
@ -609,14 +701,14 @@ impl PositionLookupResult {
self,
prefix: &HexPrefix,
lookup: impl FnMut(u32) -> T,
) -> PrefixResolution<T> {
) -> PrefixResolution<(T, u32)> {
let lookup_pos = self.result.unwrap_or_else(|pos| pos);
let mut matches = (lookup_pos..self.size)
.map(lookup)
.take_while(|id| prefix.matches(id))
.fuse();
match (matches.next(), matches.next()) {
(Some(id), None) => PrefixResolution::SingleMatch(id),
(Some(id), None) => PrefixResolution::SingleMatch((id, lookup_pos)),
(Some(_), Some(_)) => PrefixResolution::AmbiguousMatch,
(None, _) => PrefixResolution::NoMatch,
}