repo: generalize IdIndex over key and value types

Though we'll only need IdIndex<ChangeId, IndexPosition>, this allows us to write unit tests without setting up MutableIndex.
2025-02-02 09:28:40 +00:00 · 2023-01-24 17:13:52 +09:00 · 2023-01-24 17:13:52 +09:00 · 38a9180bb7
commit 38a9180bb7
parent 8c5a14d28a
1 changed files with 40 additions and 39 deletions
--- a/lib/src/repo.rs
+++ b/lib/src/repo.rs
@ -108,7 +108,7 @@ pub struct ReadonlyRepo {
    index_store: Arc<IndexStore>,
    index: OnceCell<Arc<ReadonlyIndex>>,
    // TODO: This should eventually become part of the index and not be stored fully in memory.
-    change_id_index: OnceCell<IdIndex>,
+    change_id_index: OnceCell<ChangeIdIndex>,
    view: View,
 }
@ -249,14 +249,11 @@ impl ReadonlyRepo {
        })
    }
-    fn change_id_index(&self) -> &IdIndex {
+    fn change_id_index(&self) -> &ChangeIdIndex {
        self.change_id_index.get_or_init(|| {
            let heads = self.view().heads().iter().cloned().collect_vec();
            let walk = self.index().walk_revs(&heads, &[]);
-            IdIndex::from_vec(
+            IdIndex::from_vec(walk.map(|entry| (entry.change_id(), ())).collect())
                walk.map(|entry| (entry.change_id().to_bytes(), ()))
                    .collect(),
            )
        })
    }
@ -272,7 +269,7 @@ impl ReadonlyRepo {
                self.index()
                    .shortest_unique_commit_id_prefix_len(root_commit_id),
                self.change_id_index()
-                    .shortest_unique_prefix_len(root_change_id.as_bytes()),
+                    .shortest_unique_prefix_len(root_change_id),
            )
        } else {
            // For `len = index.shortest(id)`, a prefix of length `len` will disambiguate
@ -283,7 +280,7 @@ impl ReadonlyRepo {
                self.index()
                    .shortest_unique_commit_id_prefix_len(&CommitId::from_bytes(target_id_bytes)),
                self.change_id_index()
-                    .shortest_unique_prefix_len(target_id_bytes),
+                    .shortest_unique_prefix_len(&ChangeId::from_bytes(target_id_bytes)),
            )
        }
    }
@ -1207,15 +1204,18 @@ mod dirty_cell {
    }
 }
-// This value would be used to find divergent changes, for example, or if it is
+type ChangeIdIndex = IdIndex<ChangeId, ()>;
 // necessary to mark whether an id is a Change or a Commit id.
 type IdIndexValue = ();
 #[derive(Debug, Clone)]
 pub struct IdIndex(Vec<(Vec<u8>, IdIndexValue)>);
-impl IdIndex {
+#[derive(Debug, Clone)]
-    /// Creates new index from the given keys. Keys may have duplicates.
+pub struct IdIndex<K, V>(Vec<(K, V)>);
-    pub fn from_vec(mut vec: Vec<(Vec<u8>, IdIndexValue)>) -> Self {
+
 impl<K, V> IdIndex<K, V>
 where
    K: ObjectId + Ord,
 {
    /// Creates new index from the given entries. Multiple values can be
    /// associated with a single key.
    pub fn from_vec(mut vec: Vec<(K, V)>) -> Self {
        vec.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1));
        IdIndex(vec)
    }
@ -1223,8 +1223,7 @@ impl IdIndex {
    /// This function returns the shortest length of a prefix of `key` that
    /// disambiguates it from every other key in the index.
    ///
-    /// The given `key` must be provided as bytes, not as ASCII hexadecimal
+    /// The length to be returned is a number of hexadecimal digits.
    /// digits. The length to be returned is a number of hexadecimal digits.
    ///
    /// This has some properties that we do not currently make much use of:
    ///
@ -1235,12 +1234,14 @@ impl IdIndex {
    ///   order to disambiguate, you need every letter of the key *and* the
    ///   additional fact that it's the entire key). This case is extremely
    ///   unlikely for hashes with 12+ hexadecimal characters.
-    pub fn shortest_unique_prefix_len(&self, key: &[u8]) -> usize {
+    pub fn shortest_unique_prefix_len(&self, key: &K) -> usize {
-        let pos = self.0.partition_point(|(k, _)| k.as_slice() < key);
+        let pos = self.0.partition_point(|(k, _)| k < key);
        let left = pos.checked_sub(1).map(|p| &self.0[p]);
-        let right = self.0[pos..].iter().find(|(k, _)| k.as_slice() != key);
+        let right = self.0[pos..].iter().find(|(k, _)| k != key);
        itertools::chain(left, right)
-            .map(|(neighbor, _value)| backend::common_hex_len(key, neighbor) + 1)
+            .map(|(neighbor, _value)| {
                backend::common_hex_len(key.as_bytes(), neighbor.as_bytes()) + 1
            })
            .max()
            .unwrap_or(0)
    }
@ -1253,53 +1254,53 @@ mod tests {
    #[test]
    fn test_id_index() {
        // No crash if empty
-        let id_index = IdIndex::from_vec(vec![]);
+        let id_index = IdIndex::from_vec(vec![] as Vec<(ChangeId, ())>);
        assert_eq!(
-            id_index.shortest_unique_prefix_len(&hex::decode("00").unwrap()),
+            id_index.shortest_unique_prefix_len(&ChangeId::from_hex("00")),
            0
        );
        let id_index = IdIndex::from_vec(vec![
-            (hex::decode("ab").unwrap(), ()),
+            (ChangeId::from_hex("ab"), ()),
-            (hex::decode("acd0").unwrap(), ()),
+            (ChangeId::from_hex("acd0"), ()),
-            (hex::decode("acd0").unwrap(), ()), // duplicated key is allowed
+            (ChangeId::from_hex("acd0"), ()), // duplicated key is allowed
        ]);
        assert_eq!(
-            id_index.shortest_unique_prefix_len(&hex::decode("acd0").unwrap()),
+            id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
            2
        );
        assert_eq!(
-            id_index.shortest_unique_prefix_len(&hex::decode("ac").unwrap()),
+            id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ac")),
            3
        );
        let id_index = IdIndex::from_vec(vec![
-            (hex::decode("ab").unwrap(), ()),
+            (ChangeId::from_hex("ab"), ()),
-            (hex::decode("acd0").unwrap(), ()),
+            (ChangeId::from_hex("acd0"), ()),
-            (hex::decode("acf0").unwrap(), ()),
+            (ChangeId::from_hex("acf0"), ()),
-            (hex::decode("a0").unwrap(), ()),
+            (ChangeId::from_hex("a0"), ()),
-            (hex::decode("ba").unwrap(), ()),
+            (ChangeId::from_hex("ba"), ()),
        ]);
        assert_eq!(
-            id_index.shortest_unique_prefix_len(&hex::decode("a0").unwrap()),
+            id_index.shortest_unique_prefix_len(&ChangeId::from_hex("a0")),
            2
        );
        assert_eq!(
-            id_index.shortest_unique_prefix_len(&hex::decode("ba").unwrap()),
+            id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ba")),
            1
        );
        assert_eq!(
-            id_index.shortest_unique_prefix_len(&hex::decode("ab").unwrap()),
+            id_index.shortest_unique_prefix_len(&ChangeId::from_hex("ab")),
            2
        );
        assert_eq!(
-            id_index.shortest_unique_prefix_len(&hex::decode("acd0").unwrap()),
+            id_index.shortest_unique_prefix_len(&ChangeId::from_hex("acd0")),
            3
        );
        // If it were there, the length would be 1.
        assert_eq!(
-            id_index.shortest_unique_prefix_len(&hex::decode("c0").unwrap()),
+            id_index.shortest_unique_prefix_len(&ChangeId::from_hex("c0")),
            1
        );
    }