repo: turn IdIndex into sorted Vec, use binary search

Since IdIndex is immutable, we don't need fast insertion provided by BTreeMap. Let's simply use Vec for some speed up. More importantly, this allows us to store multiple (ChangeId, CommitId) pairs for the same change id, and will unblock the use of IdIndex in revset::resolve_symbol(). Some benchmark numbers (against my "linux" repo) follow. Command: hyperfine --warmup 3 "jj log -r master \ -T 'commit_id.short_prefix_and_brackets()' \ --no-commit-working-copy --no-graph" Original: Time (mean ± σ): 1.892 s ± 0.031 s [User: 1.800 s, System: 0.092 s] Range (min … max): 1.833 s … 1.935 s 10 runs This commit: Time (mean ± σ): 867.5 ms ± 2.7 ms [User: 809.9 ms, System: 57.7 ms] Range (min … max): 862.3 ms … 871.0 ms 10 runs
2023-01-21 18:32:58 +09:00 · 2023-01-21 18:32:58 +09:00 · c82a62cf99
commit c82a62cf99
parent 1a6e71170e
1 changed files with 35 additions and 30 deletions
--- a/lib/src/repo.rs
+++ b/lib/src/repo.rs
@ -12,10 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
-use std::collections::{BTreeMap, HashMap, HashSet};
+use std::collections::{HashMap, HashSet};
 use std::fmt::{Debug, Formatter};
 use std::io::ErrorKind;
 use std::ops::Bound::{Excluded, Unbounded};
 use std::path::{Path, PathBuf};
 use std::sync::Arc;
 use std::{cmp, fs, io};
@ -248,11 +247,12 @@ impl ReadonlyRepo {
            let all_visible_revisions = crate::revset::RevsetExpression::all()
                .evaluate(self.as_repo_ref(), None)
                .unwrap();
-            let mut id_index = IdIndex::new();
+            IdIndex::from_vec(
-            for entry in all_visible_revisions.iter() {
+                all_visible_revisions
-                id_index.insert(entry.change_id().as_bytes(), ());
+                    .iter()
-            }
+                    .map(|entry| (entry.change_id().to_bytes(), ()))
-            id_index
+                    .collect(),
            )
        })
    }
@ -1186,17 +1186,14 @@ mod dirty_cell {
 // This value would be used to find divergent changes, for example, or if it is
 // necessary to mark whether an id is a Change or a Commit id.
 type IdIndexValue = ();
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone)]
-pub struct IdIndex(BTreeMap<Vec<u8>, IdIndexValue>);
+pub struct IdIndex(Vec<(Vec<u8>, IdIndexValue)>);
 impl IdIndex {
-    pub fn new() -> Self {
+    /// Creates new index from the given keys. Keys may have duplicates.
-        Self::default()
+    pub fn from_vec(mut vec: Vec<(Vec<u8>, IdIndexValue)>) -> Self {
-    }
+        vec.sort_unstable_by(|(k0, _), (k1, _)| k0.cmp(k1));
-
+        IdIndex(vec)
    /// Inserts a bytes key to the index.
    pub fn insert(&mut self, key: &[u8], value: IdIndexValue) -> Option<IdIndexValue> {
        self.0.insert(key.to_vec(), value)
    }
    /// This function returns the shortest length of a prefix of `key` that
@ -1215,11 +1212,9 @@ impl IdIndex {
    ///   additional fact that it's the entire key). This case is extremely
    ///   unlikely for hashes with 12+ hexadecimal characters.
    pub fn shortest_unique_prefix_len(&self, key: &[u8]) -> usize {
-        let left = self
+        let pos = self.0.partition_point(|(k, _)| k.as_slice() < key);
-            .0
+        let left = pos.checked_sub(1).map(|p| &self.0[p]);
-            .range::<[u8], _>((Unbounded, Excluded(key)))
+        let right = self.0[pos..].iter().find(|(k, _)| k.as_slice() != key);
            .next_back();
        let right = self.0.range::<[u8], _>((Excluded(key), Unbounded)).next();
        itertools::chain(left, right)
            .map(|(neighbor, _value)| backend::common_hex_len(key, neighbor) + 1)
            .max()
@ -1233,9 +1228,18 @@ mod tests {
    #[test]
    fn test_id_index() {
-        let mut id_index = IdIndex::new();
+        // No crash if empty
-        id_index.insert(&hex::decode("ab").unwrap(), ());
+        let id_index = IdIndex::from_vec(vec![]);
-        id_index.insert(&hex::decode("acd0").unwrap(), ());
+        assert_eq!(
            id_index.shortest_unique_prefix_len(&hex::decode("00").unwrap()),
            0
        );
        let id_index = IdIndex::from_vec(vec![
            (hex::decode("ab").unwrap(), ()),
            (hex::decode("acd0").unwrap(), ()),
            (hex::decode("acd0").unwrap(), ()), // duplicated key is allowed
        ]);
        assert_eq!(
            id_index.shortest_unique_prefix_len(&hex::decode("acd0").unwrap()),
            2
@ -1245,12 +1249,13 @@ mod tests {
            3
        );
-        let mut id_index = IdIndex::new();
+        let id_index = IdIndex::from_vec(vec![
-        id_index.insert(&hex::decode("ab").unwrap(), ());
+            (hex::decode("ab").unwrap(), ()),
-        id_index.insert(&hex::decode("acd0").unwrap(), ());
+            (hex::decode("acd0").unwrap(), ()),
-        id_index.insert(&hex::decode("acf0").unwrap(), ());
+            (hex::decode("acf0").unwrap(), ()),
-        id_index.insert(&hex::decode("a0").unwrap(), ());
+            (hex::decode("a0").unwrap(), ()),
-        id_index.insert(&hex::decode("ba").unwrap(), ());
+            (hex::decode("ba").unwrap(), ()),
        ]);
        assert_eq!(
            id_index.shortest_unique_prefix_len(&hex::decode("a0").unwrap()),