jj/lib/src/id_prefix.rs
Yuya Nishihara 5b568cabcc revset: add iterator of (CommitId, ChangeId) pairs, use it in id_index
There are a few more places where we need these pairs.
2023-05-24 01:02:37 +09:00

368 lines
13 KiB
Rust

// Copyright 2023 The Jujutsu Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::rc::Rc;
use once_cell::unsync::OnceCell;
use crate::backend::{self, ChangeId, CommitId, ObjectId};
use crate::index::{HexPrefix, PrefixResolution};
use crate::op_store::WorkspaceId;
use crate::repo::Repo;
use crate::revset::{DefaultSymbolResolver, RevsetExpression};
struct PrefixDisambiguationError;
struct DisambiguationData {
expression: Rc<RevsetExpression>,
workspace_id: Option<WorkspaceId>,
indexes: OnceCell<Indexes>,
}
struct Indexes {
commit_index: IdIndex<CommitId, ()>,
change_index: IdIndex<ChangeId, CommitId>,
}
impl DisambiguationData {
fn indexes(&self, repo: &dyn Repo) -> Result<&Indexes, PrefixDisambiguationError> {
self.indexes.get_or_try_init(|| {
let symbol_resolver = DefaultSymbolResolver::new(repo, self.workspace_id.as_ref());
let resolved_expression = self
.expression
.clone()
.resolve_user_expression(repo, &symbol_resolver)
.map_err(|_| PrefixDisambiguationError)?;
let revset = resolved_expression
.evaluate(repo)
.map_err(|_| PrefixDisambiguationError)?;
let mut commit_id_vec = vec![];
let mut change_id_vec = vec![];
for (commit_id, change_id) in revset.commit_change_ids() {
commit_id_vec.push((commit_id.clone(), ()));
change_id_vec.push((change_id, commit_id));
}
Ok(Indexes {
commit_index: IdIndex::from_vec(commit_id_vec),
change_index: IdIndex::from_vec(change_id_vec),
})
})
}
}
#[derive(Default)]
pub struct IdPrefixContext {
disambiguation: Option<DisambiguationData>,
}
impl IdPrefixContext {
pub fn disambiguate_within(
mut self,
expression: Rc<RevsetExpression>,
workspace_id: Option<WorkspaceId>,
) -> Self {
self.disambiguation = Some(DisambiguationData {
workspace_id,
expression,
indexes: OnceCell::new(),
});
self
}
fn disambiguation_indexes(&self, repo: &dyn Repo) -> Option<&Indexes> {
// TODO: propagate errors instead of treating them as if no revset was specified
self.disambiguation
.as_ref()
.and_then(|disambiguation| disambiguation.indexes(repo).ok())
}
/// Resolve an unambiguous commit ID prefix.
pub fn resolve_commit_prefix(
&self,
repo: &dyn Repo,
prefix: &HexPrefix,
) -> PrefixResolution<CommitId> {
if let Some(indexes) = self.disambiguation_indexes(repo) {
let resolution = indexes.commit_index.resolve_prefix_to_key(prefix);
if let PrefixResolution::SingleMatch(id) = resolution {
return PrefixResolution::SingleMatch(id.clone());
}
}
repo.index().resolve_prefix(prefix)
}
/// Returns the shortest length of a prefix of `commit_id` that
/// can still be resolved by `resolve_commit_prefix()`.
pub fn shortest_commit_prefix_len(&self, repo: &dyn Repo, commit_id: &CommitId) -> usize {
if let Some(indexes) = self.disambiguation_indexes(repo) {
// TODO: Avoid the double lookup here (has_key() + shortest_unique_prefix_len())
if indexes.commit_index.has_key(commit_id) {
return indexes.commit_index.shortest_unique_prefix_len(commit_id);
}
}
repo.index().shortest_unique_commit_id_prefix_len(commit_id)
}
/// Resolve an unambiguous change ID prefix to the commit IDs in the revset.
pub fn resolve_change_prefix(
&self,
repo: &dyn Repo,
prefix: &HexPrefix,
) -> PrefixResolution<Vec<CommitId>> {
if let Some(indexes) = self.disambiguation_indexes(repo) {
let resolution = indexes.change_index.resolve_prefix_to_values(prefix);
if let PrefixResolution::SingleMatch(ids) = resolution {
return PrefixResolution::SingleMatch(ids);
}
}
repo.resolve_change_id_prefix(prefix)
}
/// Returns the shortest length of a prefix of `change_id` that
/// can still be resolved by `resolve_change_prefix()`.
pub fn shortest_change_prefix_len(&self, repo: &dyn Repo, change_id: &ChangeId) -> usize {
if let Some(indexes) = self.disambiguation_indexes(repo) {
if indexes.change_index.has_key(change_id) {
return indexes.change_index.shortest_unique_prefix_len(change_id);
}
}
repo.shortest_unique_change_id_prefix_len(change_id)
}
}
#[derive(Debug, Clone)]
pub struct IdIndex<K, V>(Vec<(K, V)>);
impl<K, V> IdIndex<K, V>
where
K: ObjectId + Ord,
{
/// Creates new index from the given entries. Multiple values can be
/// associated with a single key.
pub fn from_vec(mut vec: Vec<(K, V)>) -> Self {
vec.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1));
IdIndex(vec)
}
/// Looks up entries with the given prefix, and collects values if matched
/// entries have unambiguous keys.
pub fn resolve_prefix_with<'a, B, U>(
&'a self,
prefix: &HexPrefix,
mut value_mapper: impl FnMut(&'a V) -> U,
) -> PrefixResolution<(&'a K, B)>
where
B: FromIterator<U>,
{
if prefix.min_prefix_bytes().is_empty() {
// We consider an empty prefix ambiguous even if the index has a single entry.
return PrefixResolution::AmbiguousMatch;
}
let mut range = self.resolve_prefix_range(prefix).peekable();
if let Some((first_key, _)) = range.peek().copied() {
let maybe_values: Option<B> = range
.map(|(k, v)| (k == first_key).then(|| value_mapper(v)))
.collect();
if let Some(values) = maybe_values {
PrefixResolution::SingleMatch((first_key, values))
} else {
PrefixResolution::AmbiguousMatch
}
} else {
PrefixResolution::NoMatch
}
}
/// Looks up unambiguous key with the given prefix.
pub fn resolve_prefix_to_key<'a>(&'a self, prefix: &HexPrefix) -> PrefixResolution<&'a K> {
self.resolve_prefix_with(prefix, |_| ())
.map(|(key, ())| key)
}
/// Looks up entries with the given prefix, and collects values if matched
/// entries have unambiguous keys.
pub fn resolve_prefix_to_values(&self, prefix: &HexPrefix) -> PrefixResolution<Vec<V>>
where
V: Clone,
{
self.resolve_prefix_with(prefix, |v: &V| v.clone())
.map(|(_, values)| values)
}
/// Iterates over entries with the given prefix.
pub fn resolve_prefix_range<'a: 'b, 'b>(
&'a self,
prefix: &'b HexPrefix,
) -> impl Iterator<Item = (&'a K, &'a V)> + 'b {
let min_bytes = prefix.min_prefix_bytes();
let pos = self.0.partition_point(|(k, _)| k.as_bytes() < min_bytes);
self.0[pos..]
.iter()
.take_while(|(k, _)| prefix.matches(k))
.map(|(k, v)| (k, v))
}
pub fn has_key(&self, key: &K) -> bool {
self.0.binary_search_by(|(k, _)| k.cmp(key)).is_ok()
}
/// This function returns the shortest length of a prefix of `key` that
/// disambiguates it from every other key in the index.
///
/// The length to be returned is a number of hexadecimal digits.
///
/// This has some properties that we do not currently make much use of:
///
/// - The algorithm works even if `key` itself is not in the index.
///
/// - In the special case when there are keys in the trie for which our
/// `key` is an exact prefix, returns `key.len() + 1`. Conceptually, in
/// order to disambiguate, you need every letter of the key *and* the
/// additional fact that it's the entire key). This case is extremely
/// unlikely for hashes with 12+ hexadecimal characters.
pub fn shortest_unique_prefix_len(&self, key: &K) -> usize {
let pos = self.0.partition_point(|(k, _)| k < key);
let left = pos.checked_sub(1).map(|p| &self.0[p]);
let right = self.0[pos..].iter().find(|(k, _)| k != key);
itertools::chain(left, right)
.map(|(neighbor, _value)| {
backend::common_hex_len(key.as_bytes(), neighbor.as_bytes()) + 1
})
.max()
// Even if the key is the only one in the index, we require at least one digit.
.unwrap_or(1)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::backend::{ChangeId, ObjectId};
#[test]
fn test_id_index_resolve_prefix() {
fn sorted(resolution: PrefixResolution<Vec<i32>>) -> PrefixResolution<Vec<i32>> {
resolution.map(|mut xs| {
xs.sort(); // order of values might not be preserved by IdIndex
xs
})
}
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("0000"), 0),
(ChangeId::from_hex("0099"), 1),
(ChangeId::from_hex("0099"), 2),
(ChangeId::from_hex("0aaa"), 3),
(ChangeId::from_hex("0aab"), 4),
]);
assert_eq!(
id_index.resolve_prefix_to_values(&HexPrefix::new("0").unwrap()),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_to_values(&HexPrefix::new("00").unwrap()),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_to_values(&HexPrefix::new("000").unwrap()),
PrefixResolution::SingleMatch(vec![0]),
);
assert_eq!(
id_index.resolve_prefix_to_values(&HexPrefix::new("0001").unwrap()),
PrefixResolution::NoMatch,
);
assert_eq!(
sorted(id_index.resolve_prefix_to_values(&HexPrefix::new("009").unwrap())),
PrefixResolution::SingleMatch(vec![1, 2]),
);
assert_eq!(
id_index.resolve_prefix_to_values(&HexPrefix::new("0aa").unwrap()),
PrefixResolution::AmbiguousMatch,
);
assert_eq!(
id_index.resolve_prefix_to_values(&HexPrefix::new("0aab").unwrap()),
PrefixResolution::SingleMatch(vec![4]),
);
assert_eq!(
id_index.resolve_prefix_to_values(&HexPrefix::new("f").unwrap()),
PrefixResolution::NoMatch,
);
}
#[test]
fn test_has_key() {
// No crash if empty
let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>);
assert!(!id_index.has_key(&ChangeId::from_hex("00")));
let id_index = IdIndex::from_vec(vec![(ChangeId::from_hex("ab"), ())]);
assert!(!id_index.has_key(&ChangeId::from_hex("aa")));
assert!(id_index.has_key(&ChangeId::from_hex("ab")));
assert!(!id_index.has_key(&ChangeId::from_hex("ac")));
}
#[test]
fn test_id_index_shortest_unique_prefix_len() {
// No crash if empty
let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")),
1
);
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("ab"), ()),
(ChangeId::from_hex("acd0"), ()),
(ChangeId::from_hex("acd0"), ()), // duplicated key is allowed
]);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ac")),
3
);
let id_index = IdIndex::from_vec(vec![
(ChangeId::from_hex("ab"), ()),
(ChangeId::from_hex("acd0"), ()),
(ChangeId::from_hex("acf0"), ()),
(ChangeId::from_hex("a0"), ()),
(ChangeId::from_hex("ba"), ()),
]);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("a0")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ba")),
1
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ab")),
2
);
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
3
);
// If it were there, the length would be 1.
assert_eq!(
id_index.shortest_unique_prefix_len(&ChangeId::from_hex("c0")),
1
);
}
}