repo: generalize IdIndex over key and value types

Though we'll only need IdIndex<ChangeId, IndexPosition>, this allows us to
write unit tests without setting up MutableIndex.
This commit is contained in:
Yuya Nishihara 2023-01-24 17:13:52 +09:00
parent 8c5a14d28a
commit 38a9180bb7

View file

@ -108,7 +108,7 @@ pub struct ReadonlyRepo {
index_store: Arc<IndexStore>, index_store: Arc<IndexStore>,
index: OnceCell<Arc<ReadonlyIndex>>, index: OnceCell<Arc<ReadonlyIndex>>,
// TODO: This should eventually become part of the index and not be stored fully in memory. // TODO: This should eventually become part of the index and not be stored fully in memory.
change_id_index: OnceCell<IdIndex>, change_id_index: OnceCell<ChangeIdIndex>,
view: View, view: View,
} }
@ -249,14 +249,11 @@ impl ReadonlyRepo {
}) })
} }
fn change_id_index(&self) -> &IdIndex { fn change_id_index(&self) -> &ChangeIdIndex {
self.change_id_index.get_or_init(|| { self.change_id_index.get_or_init(|| {
let heads = self.view().heads().iter().cloned().collect_vec(); let heads = self.view().heads().iter().cloned().collect_vec();
let walk = self.index().walk_revs(&heads, &[]); let walk = self.index().walk_revs(&heads, &[]);
IdIndex::from_vec( IdIndex::from_vec(walk.map(|entry| (entry.change_id(), ())).collect())
walk.map(|entry| (entry.change_id().to_bytes(), ()))
.collect(),
)
}) })
} }
@ -272,7 +269,7 @@ impl ReadonlyRepo {
self.index() self.index()
.shortest_unique_commit_id_prefix_len(root_commit_id), .shortest_unique_commit_id_prefix_len(root_commit_id),
self.change_id_index() self.change_id_index()
.shortest_unique_prefix_len(root_change_id.as_bytes()), .shortest_unique_prefix_len(root_change_id),
) )
} else { } else {
// For `len = index.shortest(id)`, a prefix of length `len` will disambiguate // For `len = index.shortest(id)`, a prefix of length `len` will disambiguate
@ -283,7 +280,7 @@ impl ReadonlyRepo {
self.index() self.index()
.shortest_unique_commit_id_prefix_len(&CommitId::from_bytes(target_id_bytes)), .shortest_unique_commit_id_prefix_len(&CommitId::from_bytes(target_id_bytes)),
self.change_id_index() self.change_id_index()
.shortest_unique_prefix_len(target_id_bytes), .shortest_unique_prefix_len(&ChangeId::from_bytes(target_id_bytes)),
) )
} }
} }
@ -1207,15 +1204,18 @@ mod dirty_cell {
} }
} }
// This value would be used to find divergent changes, for example, or if it is type ChangeIdIndex = IdIndex<ChangeId, ()>;
// necessary to mark whether an id is a Change or a Commit id.
type IdIndexValue = ();
#[derive(Debug, Clone)]
pub struct IdIndex(Vec<(Vec<u8>, IdIndexValue)>);
impl IdIndex { #[derive(Debug, Clone)]
/// Creates new index from the given keys. Keys may have duplicates. pub struct IdIndex<K, V>(Vec<(K, V)>);
pub fn from_vec(mut vec: Vec<(Vec<u8>, IdIndexValue)>) -> Self {
impl<K, V> IdIndex<K, V>
where
K: ObjectId + Ord,
{
/// Creates new index from the given entries. Multiple values can be
/// associated with a single key.
pub fn from_vec(mut vec: Vec<(K, V)>) -> Self {
vec.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1)); vec.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1));
IdIndex(vec) IdIndex(vec)
} }
@ -1223,8 +1223,7 @@ impl IdIndex {
/// This function returns the shortest length of a prefix of `key` that /// This function returns the shortest length of a prefix of `key` that
/// disambiguates it from every other key in the index. /// disambiguates it from every other key in the index.
/// ///
/// The given `key` must be provided as bytes, not as ASCII hexadecimal /// The length to be returned is a number of hexadecimal digits.
/// digits. The length to be returned is a number of hexadecimal digits.
/// ///
/// This has some properties that we do not currently make much use of: /// This has some properties that we do not currently make much use of:
/// ///
@ -1235,12 +1234,14 @@ impl IdIndex {
/// order to disambiguate, you need every letter of the key *and* the /// order to disambiguate, you need every letter of the key *and* the
/// additional fact that it's the entire key). This case is extremely /// additional fact that it's the entire key). This case is extremely
/// unlikely for hashes with 12+ hexadecimal characters. /// unlikely for hashes with 12+ hexadecimal characters.
pub fn shortest_unique_prefix_len(&self, key: &[u8]) -> usize { pub fn shortest_unique_prefix_len(&self, key: &K) -> usize {
let pos = self.0.partition_point(|(k, _)| k.as_slice() < key); let pos = self.0.partition_point(|(k, _)| k < key);
let left = pos.checked_sub(1).map(|p| &self.0[p]); let left = pos.checked_sub(1).map(|p| &self.0[p]);
let right = self.0[pos..].iter().find(|(k, _)| k.as_slice() != key); let right = self.0[pos..].iter().find(|(k, _)| k != key);
itertools::chain(left, right) itertools::chain(left, right)
.map(|(neighbor, _value)| backend::common_hex_len(key, neighbor) + 1) .map(|(neighbor, _value)| {
backend::common_hex_len(key.as_bytes(), neighbor.as_bytes()) + 1
})
.max() .max()
.unwrap_or(0) .unwrap_or(0)
} }
@ -1253,53 +1254,53 @@ mod tests {
#[test] #[test]
fn test_id_index() { fn test_id_index() {
// No crash if empty // No crash if empty
let id_index = IdIndex::from_vec(vec![]); let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>);
assert_eq!( assert_eq!(
id_index.shortest_unique_prefix_len(&hex::decode("00").unwrap()), id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")),
0 0
); );
let id_index = IdIndex::from_vec(vec![ let id_index = IdIndex::from_vec(vec![
(hex::decode("ab").unwrap(), ()), (ChangeId::from_hex("ab"), ()),
(hex::decode("acd0").unwrap(), ()), (ChangeId::from_hex("acd0"), ()),
(hex::decode("acd0").unwrap(), ()), // duplicated key is allowed (ChangeId::from_hex("acd0"), ()), // duplicated key is allowed
]); ]);
assert_eq!( assert_eq!(
id_index.shortest_unique_prefix_len(&hex::decode("acd0").unwrap()), id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
2 2
); );
assert_eq!( assert_eq!(
id_index.shortest_unique_prefix_len(&hex::decode("ac").unwrap()), id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ac")),
3 3
); );
let id_index = IdIndex::from_vec(vec![ let id_index = IdIndex::from_vec(vec![
(hex::decode("ab").unwrap(), ()), (ChangeId::from_hex("ab"), ()),
(hex::decode("acd0").unwrap(), ()), (ChangeId::from_hex("acd0"), ()),
(hex::decode("acf0").unwrap(), ()), (ChangeId::from_hex("acf0"), ()),
(hex::decode("a0").unwrap(), ()), (ChangeId::from_hex("a0"), ()),
(hex::decode("ba").unwrap(), ()), (ChangeId::from_hex("ba"), ()),
]); ]);
assert_eq!( assert_eq!(
id_index.shortest_unique_prefix_len(&hex::decode("a0").unwrap()), id_index.shortest_unique_prefix_len(&ChangeId::from_hex("a0")),
2 2
); );
assert_eq!( assert_eq!(
id_index.shortest_unique_prefix_len(&hex::decode("ba").unwrap()), id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ba")),
1 1
); );
assert_eq!( assert_eq!(
id_index.shortest_unique_prefix_len(&hex::decode("ab").unwrap()), id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ab")),
2 2
); );
assert_eq!( assert_eq!(
id_index.shortest_unique_prefix_len(&hex::decode("acd0").unwrap()), id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
3 3
); );
// If it were there, the length would be 1. // If it were there, the length would be 1.
assert_eq!( assert_eq!(
id_index.shortest_unique_prefix_len(&hex::decode("c0").unwrap()), id_index.shortest_unique_prefix_len(&ChangeId::from_hex("c0")),
1 1
); );
} }