prefixes: move IdIndex to id_prefix module

I'll reuse it there next.
This commit is contained in:
Martin von Zweigbergk 2023-05-05 16:11:42 -07:00 committed by Martin von Zweigbergk
parent 5612a3106c
commit f657bcb6ae
2 changed files with 192 additions and 185 deletions

View file

@ -21,11 +21,12 @@ use std::sync::Arc;
use itertools::Itertools;
use crate::backend::{ChangeId, CommitId, MillisSinceEpoch, ObjectId};
use crate::backend::{ChangeId, CommitId, MillisSinceEpoch};
use crate::default_index_store::{
CompositeIndex, IndexEntry, IndexEntryByPosition, IndexPosition, RevWalk,
};
use crate::default_revset_graph_iterator::RevsetGraphIterator;
use crate::id_prefix::IdIndex;
use crate::index::{HexPrefix, Index, PrefixResolution};
use crate::matchers::{EverythingMatcher, Matcher, PrefixMatcher, Visit};
use crate::repo_path::RepoPath;
@ -33,8 +34,8 @@ use crate::revset::{
ChangeIdIndex, ResolvedExpression, ResolvedPredicateExpression, Revset, RevsetEvaluationError,
RevsetFilterPredicate, RevsetGraphEdge, GENERATION_RANGE_FULL,
};
use crate::rewrite;
use crate::store::Store;
use crate::{backend, rewrite};
trait ToPredicateFn: fmt::Debug {
/// Creates function that tests if the given entry is included in the set.
@ -130,82 +131,6 @@ impl ChangeIdIndex for ChangeIdIndexImpl<'_> {
}
}
#[derive(Debug, Clone)]
struct IdIndex<K, V>(Vec<(K, V)>);
impl<K, V> IdIndex<K, V>
where
K: ObjectId + Ord,
{
/// Creates new index from the given entries. Multiple values can be
/// associated with a single key.
pub fn from_vec(mut vec: Vec<(K, V)>) -> Self {
vec.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1));
IdIndex(vec)
}
/// Looks up entries with the given prefix, and collects values if matched
/// entries have unambiguous keys.
pub fn resolve_prefix_with<U>(
&self,
prefix: &HexPrefix,
mut value_mapper: impl FnMut(&V) -> U,
) -> PrefixResolution<Vec<U>> {
let mut range = self.resolve_prefix_range(prefix).peekable();
if let Some((first_key, _)) = range.peek().copied() {
let maybe_entries: Option<Vec<_>> = range
.map(|(k, v)| (k == first_key).then(|| value_mapper(v)))
.collect();
if let Some(entries) = maybe_entries {
PrefixResolution::SingleMatch(entries)
} else {
PrefixResolution::AmbiguousMatch
}
} else {
PrefixResolution::NoMatch
}
}
/// Iterates over entries with the given prefix.
pub fn resolve_prefix_range<'a: 'b, 'b>(
&'a self,
prefix: &'b HexPrefix,
) -> impl Iterator<Item = (&'a K, &'a V)> + 'b {
let min_bytes = prefix.min_prefix_bytes();
let pos = self.0.partition_point(|(k, _)| k.as_bytes() < min_bytes);
self.0[pos..]
.iter()
.take_while(|(k, _)| prefix.matches(k))
.map(|(k, v)| (k, v))
}
/// This function returns the shortest length of a prefix of `key` that
/// disambiguates it from every other key in the index.
///
/// The length to be returned is a number of hexadecimal digits.
///
/// This has some properties that we do not currently make much use of:
///
/// - The algorithm works even if `key` itself is not in the index.
///
/// - In the special case when there are keys in the trie for which our
/// `key` is an exact prefix, returns `key.len() + 1`. Conceptually, in
/// order to disambiguate, you need every letter of the key *and* the
/// additional fact that it's the entire key). This case is extremely
/// unlikely for hashes with 12+ hexadecimal characters.
pub fn shortest_unique_prefix_len(&self, key: &K) -> usize {
let pos = self.0.partition_point(|(k, _)| k < key);
let left = pos.checked_sub(1).map(|p| &self.0[p]);
let right = self.0[pos..].iter().find(|(k, _)| k != key);
itertools::chain(left, right)
.map(|(neighbor, _value)| {
backend::common_hex_len(key.as_bytes(), neighbor.as_bytes()) + 1
})
.max()
.unwrap_or(0)
}
}
#[derive(Debug)]
struct EagerRevset<'index> {
index_entries: Vec<IndexEntry<'index>>,
@ -954,112 +879,6 @@ mod tests {
use crate::backend::{ChangeId, CommitId, ObjectId};
use crate::default_index_store::MutableIndexImpl;
#[test]
fn test_id_index_resolve_prefix() {
fn sorted(resolution: PrefixResolution<Vec<i32>>) -> PrefixResolution<Vec<i32>> {
match resolution {
PrefixResolution::SingleMatch(mut xs) => {
xs.sort(); // order of values might not be preserved by IdIndex
PrefixResolution::SingleMatch(xs)
}
_ => resolution,
}
}
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("0000"), 0),
(ChangeId::from_hex("0099"), 1),
(ChangeId::from_hex("0099"), 2),
(ChangeId::from_hex("0aaa"), 3),
(ChangeId::from_hex("0aab"), 4),
]);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0").unwrap(), |&v| v),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("00").unwrap(), |&v| v),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("000").unwrap(), |&v| v),
PrefixResolution::SingleMatch(vec![0]),
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0001").unwrap(), |&v| v),
PrefixResolution::NoMatch,
);
assert_eq!(
sorted(id_index.resolve_prefix_with(&HexPrefix::new("009").unwrap(), |&v| v)),
PrefixResolution::SingleMatch(vec![1, 2]),
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0aa").unwrap(), |&v| v),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0aab").unwrap(), |&v| v),
PrefixResolution::SingleMatch(vec![4]),
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("f").unwrap(), |&v| v),
PrefixResolution::NoMatch,
);
}
#[test]
fn test_id_index_shortest_unique_prefix_len() {
// No crash if empty
let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")),
0
);
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("ab"), ()),
(ChangeId::from_hex("acd0"), ()),
(ChangeId::from_hex("acd0"), ()), // duplicated key is allowed
]);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ac")),
3
);
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("ab"), ()),
(ChangeId::from_hex("acd0"), ()),
(ChangeId::from_hex("acf0"), ()),
(ChangeId::from_hex("a0"), ()),
(ChangeId::from_hex("ba"), ()),
]);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("a0")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ba")),
1
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ab")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
3
);
// If it were there, the length would be 1.
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("c0")),
1
);
}
/// Generator of unique 16-byte ChangeId excluding root id
fn change_id_generator() -> impl FnMut() -> ChangeId {
let mut iter = (1_u128..).map(|n| ChangeId::new(n.to_le_bytes().into()));

View file

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use crate::backend::{ChangeId, CommitId};
use crate::backend::{self, ChangeId, CommitId, ObjectId};
use crate::index::{HexPrefix, PrefixResolution};
use crate::repo::Repo;
@ -49,3 +49,191 @@ impl IdPrefixContext<'_> {
self.repo.shortest_unique_change_id_prefix_len(change_id)
}
}
#[derive(Debug, Clone)]
pub struct IdIndex<K, V>(Vec<(K, V)>);
impl<K, V> IdIndex<K, V>
where
K: ObjectId + Ord,
{
/// Creates new index from the given entries. Multiple values can be
/// associated with a single key.
pub fn from_vec(mut vec: Vec<(K, V)>) -> Self {
vec.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1));
IdIndex(vec)
}
/// Looks up entries with the given prefix, and collects values if matched
/// entries have unambiguous keys.
pub fn resolve_prefix_with<U>(
&self,
prefix: &HexPrefix,
mut value_mapper: impl FnMut(&V) -> U,
) -> PrefixResolution<Vec<U>> {
let mut range = self.resolve_prefix_range(prefix).peekable();
if let Some((first_key, _)) = range.peek().copied() {
let maybe_entries: Option<Vec<_>> = range
.map(|(k, v)| (k == first_key).then(|| value_mapper(v)))
.collect();
if let Some(entries) = maybe_entries {
PrefixResolution::SingleMatch(entries)
} else {
PrefixResolution::AmbiguousMatch
}
} else {
PrefixResolution::NoMatch
}
}
/// Iterates over entries with the given prefix.
pub fn resolve_prefix_range<'a: 'b, 'b>(
&'a self,
prefix: &'b HexPrefix,
) -> impl Iterator<Item = (&'a K, &'a V)> + 'b {
let min_bytes = prefix.min_prefix_bytes();
let pos = self.0.partition_point(|(k, _)| k.as_bytes() < min_bytes);
self.0[pos..]
.iter()
.take_while(|(k, _)| prefix.matches(k))
.map(|(k, v)| (k, v))
}
/// This function returns the shortest length of a prefix of `key` that
/// disambiguates it from every other key in the index.
///
/// The length to be returned is a number of hexadecimal digits.
///
/// This has some properties that we do not currently make much use of:
///
/// - The algorithm works even if `key` itself is not in the index.
///
/// - In the special case when there are keys in the trie for which our
/// `key` is an exact prefix, returns `key.len() + 1`. Conceptually, in
/// order to disambiguate, you need every letter of the key *and* the
/// additional fact that it's the entire key). This case is extremely
/// unlikely for hashes with 12+ hexadecimal characters.
pub fn shortest_unique_prefix_len(&self, key: &K) -> usize {
let pos = self.0.partition_point(|(k, _)| k < key);
let left = pos.checked_sub(1).map(|p| &self.0[p]);
let right = self.0[pos..].iter().find(|(k, _)| k != key);
itertools::chain(left, right)
.map(|(neighbor, _value)| {
backend::common_hex_len(key.as_bytes(), neighbor.as_bytes()) + 1
})
.max()
.unwrap_or(0)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::backend::{ChangeId, ObjectId};
#[test]
fn test_id_index_resolve_prefix() {
fn sorted(resolution: PrefixResolution<Vec<i32>>) -> PrefixResolution<Vec<i32>> {
match resolution {
PrefixResolution::SingleMatch(mut xs) => {
xs.sort(); // order of values might not be preserved by IdIndex
PrefixResolution::SingleMatch(xs)
}
_ => resolution,
}
}
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("0000"), 0),
(ChangeId::from_hex("0099"), 1),
(ChangeId::from_hex("0099"), 2),
(ChangeId::from_hex("0aaa"), 3),
(ChangeId::from_hex("0aab"), 4),
]);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0").unwrap(), |&v| v),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("00").unwrap(), |&v| v),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("000").unwrap(), |&v| v),
PrefixResolution::SingleMatch(vec![0]),
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0001").unwrap(), |&v| v),
PrefixResolution::NoMatch,
);
assert_eq!(
sorted(id_index.resolve_prefix_with(&HexPrefix::new("009").unwrap(), |&v| v)),
PrefixResolution::SingleMatch(vec![1, 2]),
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0aa").unwrap(), |&v| v),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("0aab").unwrap(), |&v| v),
PrefixResolution::SingleMatch(vec![4]),
);
assert_eq!(
id_index.resolve_prefix_with(&HexPrefix::new("f").unwrap(), |&v| v),
PrefixResolution::NoMatch,
);
}
#[test]
fn test_id_index_shortest_unique_prefix_len() {
// No crash if empty
let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")),
0
);
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("ab"), ()),
(ChangeId::from_hex("acd0"), ()),
(ChangeId::from_hex("acd0"), ()), // duplicated key is allowed
]);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ac")),
3
);
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("ab"), ()),
(ChangeId::from_hex("acd0"), ()),
(ChangeId::from_hex("acf0"), ()),
(ChangeId::from_hex("a0"), ()),
(ChangeId::from_hex("ba"), ()),
]);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("a0")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ba")),
1
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ab")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
3
);
// If it were there, the length would be 1.
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("c0")),
1
);
}
}