mirror of
https://github.com/martinvonz/jj.git
synced 2025-01-02 18:01:05 +00:00
id_prefix: only store first few bytes of keys in IdIndex
This eliminates indirect access through Vec<u8> and improves cache locality while sorting the index entries. We can achieve a similar result by using SmallVec<[u8; 24]> in place of Commit/ChangeId(Vec<u8>), but we would have to determine a reasonable id length across backends. Indexing [u8; 4] performs better, at the cost of the API and implementation complexity. For temporary Commit/ChangeId allocation in general, I think a borrowed type like Path/PathBuf will help. Testing with my "linux" repo, this saves ~670ms needed to initialize both change id index and disambiguation indexes.
This commit is contained in:
parent
b789ffb3fc
commit
ca6b9828d1
2 changed files with 376 additions and 106 deletions
|
@ -26,7 +26,7 @@ use crate::default_index_store::{
|
|||
CompositeIndex, IndexEntry, IndexEntryByPosition, IndexPosition, RevWalk,
|
||||
};
|
||||
use crate::default_revset_graph_iterator::RevsetGraphIterator;
|
||||
use crate::id_prefix::IdIndex;
|
||||
use crate::id_prefix::{IdIndex, IdIndexSource, IdIndexSourceEntry};
|
||||
use crate::index::{HexPrefix, Index, PrefixResolution};
|
||||
use crate::matchers::{EverythingMatcher, Matcher, PrefixMatcher, Visit};
|
||||
use crate::repo_path::RepoPath;
|
||||
|
@ -109,7 +109,7 @@ impl<'index> Revset<'index> for RevsetImpl<'index> {
|
|||
// TODO: Create a persistent lookup from change id to commit ids.
|
||||
let mut pos_by_change = IdIndex::builder();
|
||||
for entry in self.inner.iter() {
|
||||
pos_by_change.insert(entry.change_id(), entry.position());
|
||||
pos_by_change.insert(&entry.change_id(), entry.position());
|
||||
}
|
||||
Box::new(ChangeIdIndexImpl {
|
||||
index: self.index,
|
||||
|
@ -128,18 +128,33 @@ impl<'index> Revset<'index> for RevsetImpl<'index> {
|
|||
|
||||
struct ChangeIdIndexImpl<'index> {
|
||||
index: CompositeIndex<'index>,
|
||||
pos_by_change: IdIndex<ChangeId, IndexPosition>,
|
||||
pos_by_change: IdIndex<ChangeId, IndexPosition, 4>,
|
||||
}
|
||||
|
||||
impl ChangeIdIndex for ChangeIdIndexImpl<'_> {
|
||||
fn resolve_prefix(&self, prefix: &HexPrefix) -> PrefixResolution<Vec<CommitId>> {
|
||||
self.pos_by_change
|
||||
.resolve_prefix_with(prefix, |pos| self.index.entry_by_pos(*pos).commit_id())
|
||||
.resolve_prefix_with(self.index, prefix, |entry| entry.commit_id())
|
||||
.map(|(_, commit_ids)| commit_ids)
|
||||
}
|
||||
|
||||
fn shortest_unique_prefix_len(&self, change_id: &ChangeId) -> usize {
|
||||
self.pos_by_change.shortest_unique_prefix_len(change_id)
|
||||
self.pos_by_change
|
||||
.shortest_unique_prefix_len(self.index, change_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'index> IdIndexSource<IndexPosition> for CompositeIndex<'index> {
|
||||
type Entry = IndexEntry<'index>;
|
||||
|
||||
fn entry_at(&self, pointer: &IndexPosition) -> Self::Entry {
|
||||
self.entry_by_pos(*pointer)
|
||||
}
|
||||
}
|
||||
|
||||
impl IdIndexSourceEntry<ChangeId> for IndexEntry<'_> {
|
||||
fn to_key(&self) -> ChangeId {
|
||||
self.change_id()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
use std::iter;
|
||||
use std::marker::PhantomData;
|
||||
use std::rc::Rc;
|
||||
|
||||
use itertools::Itertools as _;
|
||||
|
@ -33,8 +35,8 @@ struct DisambiguationData {
|
|||
|
||||
struct Indexes {
|
||||
commit_change_ids: Vec<(CommitId, ChangeId)>,
|
||||
commit_index: IdIndex<CommitId, u32>,
|
||||
change_index: IdIndex<ChangeId, u32>,
|
||||
commit_index: IdIndex<CommitId, u32, 4>,
|
||||
change_index: IdIndex<ChangeId, u32, 4>,
|
||||
}
|
||||
|
||||
impl DisambiguationData {
|
||||
|
@ -55,8 +57,8 @@ impl DisambiguationData {
|
|||
let mut change_index = IdIndex::with_capacity(commit_change_ids.len());
|
||||
for (i, (commit_id, change_id)) in commit_change_ids.iter().enumerate() {
|
||||
let i: u32 = i.try_into().unwrap();
|
||||
commit_index.insert(commit_id.clone(), i);
|
||||
change_index.insert(change_id.clone(), i);
|
||||
commit_index.insert(commit_id, i);
|
||||
change_index.insert(change_id, i);
|
||||
}
|
||||
Ok(Indexes {
|
||||
commit_change_ids,
|
||||
|
@ -67,6 +69,28 @@ impl DisambiguationData {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> IdIndexSource<u32> for &'a [(CommitId, ChangeId)] {
|
||||
type Entry = &'a (CommitId, ChangeId);
|
||||
|
||||
fn entry_at(&self, pointer: &u32) -> Self::Entry {
|
||||
&self[*pointer as usize]
|
||||
}
|
||||
}
|
||||
|
||||
impl IdIndexSourceEntry<CommitId> for &'_ (CommitId, ChangeId) {
|
||||
fn to_key(&self) -> CommitId {
|
||||
let (commit_id, _) = self;
|
||||
commit_id.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl IdIndexSourceEntry<ChangeId> for &'_ (CommitId, ChangeId) {
|
||||
fn to_key(&self) -> ChangeId {
|
||||
let (_, change_id) = self;
|
||||
change_id.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct IdPrefixContext {
|
||||
disambiguation: Option<DisambiguationData>,
|
||||
|
@ -100,9 +124,11 @@ impl IdPrefixContext {
|
|||
prefix: &HexPrefix,
|
||||
) -> PrefixResolution<CommitId> {
|
||||
if let Some(indexes) = self.disambiguation_indexes(repo) {
|
||||
let resolution = indexes.commit_index.resolve_prefix_to_key(prefix);
|
||||
let resolution = indexes
|
||||
.commit_index
|
||||
.resolve_prefix_to_key(&*indexes.commit_change_ids, prefix);
|
||||
if let PrefixResolution::SingleMatch(id) = resolution {
|
||||
return PrefixResolution::SingleMatch(id.clone());
|
||||
return PrefixResolution::SingleMatch(id);
|
||||
}
|
||||
}
|
||||
repo.index().resolve_prefix(prefix)
|
||||
|
@ -112,7 +138,10 @@ impl IdPrefixContext {
|
|||
/// can still be resolved by `resolve_commit_prefix()`.
|
||||
pub fn shortest_commit_prefix_len(&self, repo: &dyn Repo, commit_id: &CommitId) -> usize {
|
||||
if let Some(indexes) = self.disambiguation_indexes(repo) {
|
||||
if let Some(lookup) = indexes.commit_index.lookup_exact(commit_id) {
|
||||
if let Some(lookup) = indexes
|
||||
.commit_index
|
||||
.lookup_exact(&*indexes.commit_change_ids, commit_id)
|
||||
{
|
||||
return lookup.shortest_unique_prefix_len();
|
||||
}
|
||||
}
|
||||
|
@ -126,9 +155,11 @@ impl IdPrefixContext {
|
|||
prefix: &HexPrefix,
|
||||
) -> PrefixResolution<Vec<CommitId>> {
|
||||
if let Some(indexes) = self.disambiguation_indexes(repo) {
|
||||
let resolution = indexes
|
||||
.change_index
|
||||
.resolve_prefix_with(prefix, |&i| indexes.commit_change_ids[i as usize].0.clone());
|
||||
let resolution = indexes.change_index.resolve_prefix_with(
|
||||
&*indexes.commit_change_ids,
|
||||
prefix,
|
||||
|(commit_id, _)| commit_id.clone(),
|
||||
);
|
||||
if let PrefixResolution::SingleMatch((_, ids)) = resolution {
|
||||
return PrefixResolution::SingleMatch(ids);
|
||||
}
|
||||
|
@ -140,7 +171,10 @@ impl IdPrefixContext {
|
|||
/// can still be resolved by `resolve_change_prefix()`.
|
||||
pub fn shortest_change_prefix_len(&self, repo: &dyn Repo, change_id: &ChangeId) -> usize {
|
||||
if let Some(indexes) = self.disambiguation_indexes(repo) {
|
||||
if let Some(lookup) = indexes.change_index.lookup_exact(change_id) {
|
||||
if let Some(lookup) = indexes
|
||||
.change_index
|
||||
.lookup_exact(&*indexes.commit_change_ids, change_id)
|
||||
{
|
||||
return lookup.shortest_unique_prefix_len();
|
||||
}
|
||||
}
|
||||
|
@ -148,69 +182,107 @@ impl IdPrefixContext {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct IdIndex<K, V>(Vec<(K, V)>);
|
||||
|
||||
/// In-memory immutable index to do prefix lookup of key `K` through `P`.
|
||||
///
|
||||
/// In a nutshell, this is a mapping of `K` -> `P` -> `S::Entry` where `S:
|
||||
/// IdIndexSource<P>`. The source table `S` isn't owned by this index.
|
||||
///
|
||||
/// This index stores first `N` bytes of each key `K` associated with the
|
||||
/// pointer `P`. `K` may be a heap-allocated object. `P` is supposed to be
|
||||
/// a cheap value type like `u32` or `usize`. As the index entry of type
|
||||
/// `([u8; N], P)` is small and has no indirect reference, constructing
|
||||
/// the index should be faster than sorting the source `(K, _)` pairs.
|
||||
///
|
||||
/// A key `K` must be at least `N` bytes long.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IdIndexBuilder<K, V> {
|
||||
unsorted_index: Vec<(K, V)>,
|
||||
pub struct IdIndex<K, P, const N: usize> {
|
||||
// Maybe better to build separate (keys, values) vectors, but there's no std function
|
||||
// to co-sort them.
|
||||
index: Vec<([u8; N], P)>,
|
||||
// Let's pretend [u8; N] above were of type K. It helps type inference, and ensures that
|
||||
// IdIndexSource has the same key type.
|
||||
phantom_key: PhantomData<K>,
|
||||
}
|
||||
|
||||
impl<K, V> IdIndexBuilder<K, V>
|
||||
/// Source table for `IdIndex` to map pointer of type `P` to entry.
|
||||
pub trait IdIndexSource<P> {
|
||||
type Entry;
|
||||
|
||||
fn entry_at(&self, pointer: &P) -> Self::Entry;
|
||||
}
|
||||
|
||||
/// Source table entry of `IdIndex`, which is conceptually a `(key, value)`
|
||||
/// pair.
|
||||
pub trait IdIndexSourceEntry<K> {
|
||||
fn to_key(&self) -> K;
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IdIndexBuilder<K, P, const N: usize> {
|
||||
unsorted_index: Vec<([u8; N], P)>,
|
||||
phantom_key: PhantomData<K>,
|
||||
}
|
||||
|
||||
impl<K, P, const N: usize> IdIndexBuilder<K, P, N>
|
||||
where
|
||||
K: ObjectId + Ord,
|
||||
{
|
||||
/// Inserts new entry. Multiple values can be associated with a single key.
|
||||
pub fn insert(&mut self, key: K, value: V) {
|
||||
self.unsorted_index.push((key, value));
|
||||
pub fn insert(&mut self, key: &K, pointer: P) {
|
||||
let short_key = unwrap_as_short_key(key.as_bytes());
|
||||
self.unsorted_index.push((*short_key, pointer));
|
||||
}
|
||||
|
||||
pub fn build(self) -> IdIndex<K, V> {
|
||||
pub fn build(self) -> IdIndex<K, P, N> {
|
||||
let mut index = self.unsorted_index;
|
||||
index.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1));
|
||||
IdIndex(index)
|
||||
index.sort_unstable_by_key(|(s, _)| *s);
|
||||
let phantom_key = self.phantom_key;
|
||||
IdIndex { index, phantom_key }
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V> IdIndex<K, V>
|
||||
impl<K, P, const N: usize> IdIndex<K, P, N>
|
||||
where
|
||||
K: ObjectId + Ord,
|
||||
{
|
||||
pub fn builder() -> IdIndexBuilder<K, V> {
|
||||
pub fn builder() -> IdIndexBuilder<K, P, N> {
|
||||
IdIndexBuilder {
|
||||
unsorted_index: Vec::new(),
|
||||
phantom_key: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_capacity(capacity: usize) -> IdIndexBuilder<K, V> {
|
||||
pub fn with_capacity(capacity: usize) -> IdIndexBuilder<K, P, N> {
|
||||
IdIndexBuilder {
|
||||
unsorted_index: Vec::with_capacity(capacity),
|
||||
phantom_key: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Looks up entries with the given prefix, and collects values if matched
|
||||
/// entries have unambiguous keys.
|
||||
pub fn resolve_prefix_with<'a, B, U>(
|
||||
&'a self,
|
||||
pub fn resolve_prefix_with<B, S, U>(
|
||||
&self,
|
||||
source: S,
|
||||
prefix: &HexPrefix,
|
||||
value_mapper: impl FnMut(&'a V) -> U,
|
||||
) -> PrefixResolution<(&'a K, B)>
|
||||
entry_mapper: impl FnMut(S::Entry) -> U,
|
||||
) -> PrefixResolution<(K, B)>
|
||||
where
|
||||
B: FromIterator<U>,
|
||||
S: IdIndexSource<P>,
|
||||
S::Entry: IdIndexSourceEntry<K>,
|
||||
{
|
||||
fn collect<'a, B, K, V, U>(
|
||||
range: impl Iterator<Item = (&'a K, &'a V)>,
|
||||
mut value_mapper: impl FnMut(&'a V) -> U,
|
||||
) -> PrefixResolution<(&'a K, B)>
|
||||
fn collect<B, K, E, U>(
|
||||
mut range: impl Iterator<Item = (K, E)>,
|
||||
mut entry_mapper: impl FnMut(E) -> U,
|
||||
) -> PrefixResolution<(K, B)>
|
||||
where
|
||||
B: FromIterator<U>,
|
||||
K: Eq + 'a,
|
||||
V: 'a,
|
||||
K: Eq,
|
||||
{
|
||||
let mut range = range.peekable();
|
||||
if let Some((first_key, _)) = range.peek().copied() {
|
||||
let maybe_values: Option<B> = range
|
||||
.map(|(k, v)| (k == first_key).then(|| value_mapper(v)))
|
||||
if let Some((first_key, first_entry)) = range.next() {
|
||||
let maybe_values: Option<B> = iter::once(Some(entry_mapper(first_entry)))
|
||||
.chain(range.map(|(k, e)| (k == first_key).then(|| entry_mapper(e))))
|
||||
.collect();
|
||||
if let Some(values) = maybe_values {
|
||||
PrefixResolution::SingleMatch((first_key, values))
|
||||
|
@ -228,30 +300,71 @@ where
|
|||
return PrefixResolution::AmbiguousMatch;
|
||||
}
|
||||
|
||||
let pos = self.0.partition_point(|(k, _)| k.as_bytes() < min_bytes);
|
||||
let range = self.0[pos..]
|
||||
.iter()
|
||||
.take_while(|(k, _)| prefix.matches(k))
|
||||
.map(|(k, v)| (k, v));
|
||||
collect(range, value_mapper)
|
||||
let to_key_entry_pair = |(_, pointer): &(_, P)| -> (K, S::Entry) {
|
||||
let entry = source.entry_at(pointer);
|
||||
(entry.to_key(), entry)
|
||||
};
|
||||
if min_bytes.len() > N {
|
||||
// If the min prefix (including odd byte) is longer than the stored short keys,
|
||||
// we are sure that min_bytes[..N] does not include the odd byte. Use it to
|
||||
// take contiguous range, then filter by (longer) prefix.matches().
|
||||
let short_bytes = unwrap_as_short_key(min_bytes);
|
||||
let pos = self.index.partition_point(|(s, _)| s < short_bytes);
|
||||
let range = self.index[pos..]
|
||||
.iter()
|
||||
.take_while(|(s, _)| s == short_bytes)
|
||||
.map(to_key_entry_pair)
|
||||
.filter(|(k, _)| prefix.matches(k));
|
||||
collect(range, entry_mapper)
|
||||
} else {
|
||||
// Otherwise, use prefix.matches() to deal with odd byte. Since the prefix is
|
||||
// covered by short key width, we're sure that the matching prefixes are sorted.
|
||||
let pos = self.index.partition_point(|(s, _)| &s[..] < min_bytes);
|
||||
let range = self.index[pos..]
|
||||
.iter()
|
||||
.map(to_key_entry_pair)
|
||||
.take_while(|(k, _)| prefix.matches(k));
|
||||
collect(range, entry_mapper)
|
||||
}
|
||||
}
|
||||
|
||||
/// Looks up unambiguous key with the given prefix.
|
||||
pub fn resolve_prefix_to_key<'a>(&'a self, prefix: &HexPrefix) -> PrefixResolution<&'a K> {
|
||||
self.resolve_prefix_with(prefix, |_| ())
|
||||
pub fn resolve_prefix_to_key<S>(&self, source: S, prefix: &HexPrefix) -> PrefixResolution<K>
|
||||
where
|
||||
S: IdIndexSource<P>,
|
||||
S::Entry: IdIndexSourceEntry<K>,
|
||||
{
|
||||
self.resolve_prefix_with(source, prefix, |_| ())
|
||||
.map(|(key, ())| key)
|
||||
}
|
||||
|
||||
/// Looks up entry for the key. Returns accessor to neighbors.
|
||||
pub fn lookup_exact<'i, 'q>(&'i self, key: &'q K) -> Option<IdIndexLookup<'i, 'q, K, V>> {
|
||||
let lookup = self.lookup_some(key);
|
||||
pub fn lookup_exact<'i, 'q, S>(
|
||||
&'i self,
|
||||
source: S,
|
||||
key: &'q K,
|
||||
) -> Option<IdIndexLookup<'i, 'q, K, P, S, N>>
|
||||
where
|
||||
S: IdIndexSource<P>,
|
||||
S::Entry: IdIndexSourceEntry<K>,
|
||||
{
|
||||
let lookup = self.lookup_some(source, key);
|
||||
lookup.has_key().then_some(lookup)
|
||||
}
|
||||
|
||||
fn lookup_some<'i, 'q>(&'i self, key: &'q K) -> IdIndexLookup<'i, 'q, K, V> {
|
||||
let index = &self.0;
|
||||
let pos = index.partition_point(|(k, _)| k < key);
|
||||
IdIndexLookup { index, key, pos }
|
||||
fn lookup_some<'i, 'q, S>(&'i self, source: S, key: &'q K) -> IdIndexLookup<'i, 'q, K, P, S, N>
|
||||
where
|
||||
S: IdIndexSource<P>,
|
||||
{
|
||||
let short_key = unwrap_as_short_key(key.as_bytes());
|
||||
let index = &self.index;
|
||||
let pos = index.partition_point(|(s, _)| s < short_key);
|
||||
IdIndexLookup {
|
||||
index,
|
||||
source,
|
||||
key,
|
||||
pos,
|
||||
}
|
||||
}
|
||||
|
||||
/// This function returns the shortest length of a prefix of `key` that
|
||||
|
@ -268,72 +381,119 @@ where
|
|||
/// order to disambiguate, you need every letter of the key *and* the
|
||||
/// additional fact that it's the entire key). This case is extremely
|
||||
/// unlikely for hashes with 12+ hexadecimal characters.
|
||||
pub fn shortest_unique_prefix_len(&self, key: &K) -> usize {
|
||||
self.lookup_some(key).shortest_unique_prefix_len()
|
||||
pub fn shortest_unique_prefix_len<S>(&self, source: S, key: &K) -> usize
|
||||
where
|
||||
S: IdIndexSource<P>,
|
||||
S::Entry: IdIndexSourceEntry<K>,
|
||||
{
|
||||
self.lookup_some(source, key).shortest_unique_prefix_len()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct IdIndexLookup<'i, 'q, K, V> {
|
||||
index: &'i Vec<(K, V)>,
|
||||
pub struct IdIndexLookup<'i, 'q, K, P, S, const N: usize> {
|
||||
index: &'i Vec<([u8; N], P)>,
|
||||
source: S,
|
||||
key: &'q K,
|
||||
pos: usize, // may be index.len()
|
||||
}
|
||||
|
||||
impl<'i, 'q, K, V> IdIndexLookup<'i, 'q, K, V>
|
||||
impl<'i, 'q, K, P, S, const N: usize> IdIndexLookup<'i, 'q, K, P, S, N>
|
||||
where
|
||||
K: ObjectId + Eq,
|
||||
S: IdIndexSource<P>,
|
||||
S::Entry: IdIndexSourceEntry<K>,
|
||||
{
|
||||
fn has_key(&self) -> bool {
|
||||
let short_key = unwrap_as_short_key(self.key.as_bytes());
|
||||
self.index[self.pos..]
|
||||
.iter()
|
||||
.take_while(|(k, _)| k == self.key)
|
||||
.next()
|
||||
.is_some()
|
||||
.take_while(|(s, _)| s == short_key)
|
||||
.any(|(_, p)| self.source.entry_at(p).to_key() == *self.key)
|
||||
}
|
||||
|
||||
pub fn shortest_unique_prefix_len(&self) -> usize {
|
||||
// Since entries having the same short key aren't sorted by the full-length key,
|
||||
// we need to scan all entries in the current chunk, plus left/right neighbors.
|
||||
// Typically, current.len() is 1.
|
||||
let short_key = unwrap_as_short_key(self.key.as_bytes());
|
||||
let left = self.pos.checked_sub(1).map(|p| &self.index[p]);
|
||||
let right = self.index[self.pos..].iter().find(|(k, _)| k != self.key);
|
||||
itertools::chain(left, right)
|
||||
.map(|(neighbor, _value)| {
|
||||
backend::common_hex_len(self.key.as_bytes(), neighbor.as_bytes()) + 1
|
||||
})
|
||||
.max()
|
||||
// Even if the key is the only one in the index, we require at least one digit.
|
||||
.unwrap_or(1)
|
||||
let (current, right) = {
|
||||
let range = &self.index[self.pos..];
|
||||
let count = range.iter().take_while(|(s, _)| s == short_key).count();
|
||||
(&range[..count], range.get(count))
|
||||
};
|
||||
|
||||
// Left/right neighbors should have unique short keys. For the current chunk,
|
||||
// we need to look up full-length keys.
|
||||
let unique_len = |a: &[u8], b: &[u8]| backend::common_hex_len(a, b) + 1;
|
||||
let neighbor_lens = left
|
||||
.iter()
|
||||
.chain(&right)
|
||||
.map(|(s, _)| unique_len(s, short_key));
|
||||
let current_lens = current
|
||||
.iter()
|
||||
.map(|(_, p)| self.source.entry_at(p).to_key())
|
||||
.filter(|key| key != self.key)
|
||||
.map(|key| unique_len(key.as_bytes(), self.key.as_bytes()));
|
||||
// Even if the key is the only one in the index, we require at least one digit.
|
||||
neighbor_lens.chain(current_lens).max().unwrap_or(1)
|
||||
}
|
||||
}
|
||||
|
||||
fn unwrap_as_short_key<const N: usize>(key_bytes: &[u8]) -> &[u8; N] {
|
||||
let short_slice = key_bytes.get(..N).expect("key too short");
|
||||
short_slice.try_into().unwrap()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::backend::{ChangeId, ObjectId};
|
||||
|
||||
fn build_id_index<K, V>(entries: &[(K, V)]) -> IdIndex<K, V>
|
||||
where
|
||||
K: ObjectId + Ord + Clone,
|
||||
V: Clone,
|
||||
{
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
struct Position(usize);
|
||||
|
||||
impl<'a, V> IdIndexSource<Position> for &'a [(ChangeId, V)] {
|
||||
type Entry = &'a (ChangeId, V);
|
||||
|
||||
fn entry_at(&self, pointer: &Position) -> Self::Entry {
|
||||
&self[pointer.0]
|
||||
}
|
||||
}
|
||||
|
||||
impl<V> IdIndexSourceEntry<ChangeId> for &'_ (ChangeId, V) {
|
||||
fn to_key(&self) -> ChangeId {
|
||||
let (change_id, _) = self;
|
||||
change_id.clone()
|
||||
}
|
||||
}
|
||||
|
||||
fn build_id_index<V, const N: usize>(
|
||||
entries: &[(ChangeId, V)],
|
||||
) -> IdIndex<ChangeId, Position, N> {
|
||||
let mut builder = IdIndex::with_capacity(entries.len());
|
||||
for (k, v) in entries {
|
||||
builder.insert(k.clone(), v.clone());
|
||||
for (i, (k, _)) in entries.iter().enumerate() {
|
||||
builder.insert(k, Position(i));
|
||||
}
|
||||
builder.build()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_id_index_resolve_prefix() {
|
||||
let id_index = build_id_index(&[
|
||||
let source = vec![
|
||||
(ChangeId::from_hex("0000"), 0),
|
||||
(ChangeId::from_hex("0099"), 1),
|
||||
(ChangeId::from_hex("0099"), 2),
|
||||
(ChangeId::from_hex("0aaa"), 3),
|
||||
(ChangeId::from_hex("0aab"), 4),
|
||||
]);
|
||||
];
|
||||
|
||||
// short_key.len() == full_key.len()
|
||||
let id_index = build_id_index::<_, 2>(&source);
|
||||
let resolve_prefix = |prefix: &HexPrefix| {
|
||||
let resolution: PrefixResolution<(_, Vec<_>)> =
|
||||
id_index.resolve_prefix_with(prefix, |&v| v);
|
||||
id_index.resolve_prefix_with(&*source, prefix, |(_, v)| *v);
|
||||
resolution.map(|(key, mut values)| {
|
||||
values.sort(); // order of values might not be preserved by IdIndex
|
||||
(key, values)
|
||||
|
@ -349,7 +509,7 @@ mod tests {
|
|||
);
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("000").unwrap()),
|
||||
PrefixResolution::SingleMatch((&ChangeId::from_hex("0000"), vec![0])),
|
||||
PrefixResolution::SingleMatch((ChangeId::from_hex("0000"), vec![0])),
|
||||
);
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("0001").unwrap()),
|
||||
|
@ -357,7 +517,7 @@ mod tests {
|
|||
);
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("009").unwrap()),
|
||||
PrefixResolution::SingleMatch((&ChangeId::from_hex("0099"), vec![1, 2])),
|
||||
PrefixResolution::SingleMatch((ChangeId::from_hex("0099"), vec![1, 2])),
|
||||
);
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("0aa").unwrap()),
|
||||
|
@ -365,76 +525,171 @@ mod tests {
|
|||
);
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("0aab").unwrap()),
|
||||
PrefixResolution::SingleMatch((&ChangeId::from_hex("0aab"), vec![4])),
|
||||
PrefixResolution::SingleMatch((ChangeId::from_hex("0aab"), vec![4])),
|
||||
);
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("f").unwrap()),
|
||||
PrefixResolution::NoMatch,
|
||||
);
|
||||
|
||||
// short_key.len() < full_key.len()
|
||||
let id_index = build_id_index::<_, 1>(&source);
|
||||
let resolve_prefix = |prefix: &HexPrefix| {
|
||||
let resolution: PrefixResolution<(_, Vec<_>)> =
|
||||
id_index.resolve_prefix_with(&*source, prefix, |(_, v)| *v);
|
||||
resolution.map(|(key, mut values)| {
|
||||
values.sort(); // order of values might not be preserved by IdIndex
|
||||
(key, values)
|
||||
})
|
||||
};
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("00").unwrap()),
|
||||
PrefixResolution::AmbiguousMatch,
|
||||
);
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("000").unwrap()),
|
||||
PrefixResolution::SingleMatch((ChangeId::from_hex("0000"), vec![0])),
|
||||
);
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("0001").unwrap()),
|
||||
PrefixResolution::NoMatch,
|
||||
);
|
||||
// For short key "00", ["0000", "0099", "0099"] would match. We shouldn't
|
||||
// break at "009".matches("0000").
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("009").unwrap()),
|
||||
PrefixResolution::SingleMatch((ChangeId::from_hex("0099"), vec![1, 2])),
|
||||
);
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("0a").unwrap()),
|
||||
PrefixResolution::AmbiguousMatch,
|
||||
);
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("0aa").unwrap()),
|
||||
PrefixResolution::AmbiguousMatch,
|
||||
);
|
||||
assert_eq!(
|
||||
resolve_prefix(&HexPrefix::new("0aab").unwrap()),
|
||||
PrefixResolution::SingleMatch((ChangeId::from_hex("0aab"), vec![4])),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lookup_exact() {
|
||||
// No crash if empty
|
||||
let id_index = build_id_index::<ChangeId, ()>(&[]);
|
||||
assert!(id_index.lookup_exact(&ChangeId::from_hex("00")).is_none());
|
||||
let source: Vec<(ChangeId, ())> = vec![];
|
||||
let id_index = build_id_index::<_, 1>(&source);
|
||||
assert!(id_index
|
||||
.lookup_exact(&*source, &ChangeId::from_hex("00"))
|
||||
.is_none());
|
||||
|
||||
let id_index = build_id_index(&[(ChangeId::from_hex("ab"), ())]);
|
||||
assert!(id_index.lookup_exact(&ChangeId::from_hex("aa")).is_none());
|
||||
assert!(id_index.lookup_exact(&ChangeId::from_hex("ab")).is_some());
|
||||
assert!(id_index.lookup_exact(&ChangeId::from_hex("ac")).is_none());
|
||||
let source = vec![
|
||||
(ChangeId::from_hex("ab00"), ()),
|
||||
(ChangeId::from_hex("ab01"), ()),
|
||||
];
|
||||
let id_index = build_id_index::<_, 1>(&source);
|
||||
assert!(id_index
|
||||
.lookup_exact(&*source, &ChangeId::from_hex("aa00"))
|
||||
.is_none());
|
||||
assert!(id_index
|
||||
.lookup_exact(&*source, &ChangeId::from_hex("ab00"))
|
||||
.is_some());
|
||||
assert!(id_index
|
||||
.lookup_exact(&*source, &ChangeId::from_hex("ab01"))
|
||||
.is_some());
|
||||
assert!(id_index
|
||||
.lookup_exact(&*source, &ChangeId::from_hex("ab02"))
|
||||
.is_none());
|
||||
assert!(id_index
|
||||
.lookup_exact(&*source, &ChangeId::from_hex("ac00"))
|
||||
.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_id_index_shortest_unique_prefix_len() {
|
||||
// No crash if empty
|
||||
let id_index = build_id_index::<ChangeId, ()>(&[]);
|
||||
let source: Vec<(ChangeId, ())> = vec![];
|
||||
let id_index = build_id_index::<_, 1>(&source);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")),
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("00")),
|
||||
1
|
||||
);
|
||||
|
||||
let id_index = build_id_index(&[
|
||||
let source = vec![
|
||||
(ChangeId::from_hex("ab"), ()),
|
||||
(ChangeId::from_hex("acd0"), ()),
|
||||
(ChangeId::from_hex("acd0"), ()), // duplicated key is allowed
|
||||
]);
|
||||
];
|
||||
let id_index = build_id_index::<_, 1>(&source);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("acd0")),
|
||||
2
|
||||
);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ac")),
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("ac")),
|
||||
3
|
||||
);
|
||||
|
||||
let id_index = build_id_index(&[
|
||||
let source = vec![
|
||||
(ChangeId::from_hex("ab"), ()),
|
||||
(ChangeId::from_hex("acd0"), ()),
|
||||
(ChangeId::from_hex("acf0"), ()),
|
||||
(ChangeId::from_hex("a0"), ()),
|
||||
(ChangeId::from_hex("ba"), ()),
|
||||
]);
|
||||
];
|
||||
let id_index = build_id_index::<_, 1>(&source);
|
||||
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("a0")),
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("a0")),
|
||||
2
|
||||
);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ba")),
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("ba")),
|
||||
1
|
||||
);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ab")),
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("ab")),
|
||||
2
|
||||
);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("acd0")),
|
||||
3
|
||||
);
|
||||
// If it were there, the length would be 1.
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("c0")),
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("c0")),
|
||||
1
|
||||
);
|
||||
|
||||
let source = vec![
|
||||
(ChangeId::from_hex("000000"), ()),
|
||||
(ChangeId::from_hex("01ffff"), ()),
|
||||
(ChangeId::from_hex("010000"), ()),
|
||||
(ChangeId::from_hex("01fffe"), ()),
|
||||
(ChangeId::from_hex("ffffff"), ()),
|
||||
];
|
||||
let id_index = build_id_index::<_, 1>(&source);
|
||||
// Multiple candidates in the current chunk "01"
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("01ffff")),
|
||||
6
|
||||
);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("010000")),
|
||||
3
|
||||
);
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("01fffe")),
|
||||
6
|
||||
);
|
||||
// Only right neighbor
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("000000")),
|
||||
2
|
||||
);
|
||||
// Only left neighbor
|
||||
assert_eq!(
|
||||
id_index.shortest_unique_prefix_len(&*source, &ChangeId::from_hex("ffffff")),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue