index: import commits in chronological order

This basically means that heads in a filtered graph appear in reverse
chronological order. Before, "jj log -r 'tags()'" in linux-stable repo would
look randomly sorted once you ran "jj debug reindex" in it.

With this change, indexing is more like breadth-first search, and BFS is
known to be bad at rendering nice graph (because branches run in parallel.)
However, we have a post process to group topological branches, so we don't
have this problem. For serialization formats like Mercurial's revlog iirc,
BFS leads to bad compression ratio, but our index isn't that kind of data.

Reindexing gets slightly slower, but I think this is negligible.

  (in Git repository)
  % hyperfine --warmup 3 --runs 10 "jj debug reindex --ignore-working-copy"
  (original)
  Time (mean ± σ):      1.521 s ±  0.027 s    [User: 1.307 s, System: 0.211 s]
  Range (min … max):    1.486 s …  1.573 s    10 runs
  (new)
  Time (mean ± σ):      1.568 s ±  0.027 s    [User: 1.368 s, System: 0.197 s]
  Range (min … max):    1.531 s …  1.625 s    10 runs

Another idea is to sort heads chronologically and run DFS-based topological
sorting. It's ad-hoc, but worked surprisingly well for my local repositories.
For repositories with lots of long-running branches, this commit will provide
more predictable result than DFS-based one.
This commit is contained in:
Yuya Nishihara 2023-08-12 19:32:05 +09:00
parent cc6e9150d5
commit 6286cde543
5 changed files with 38 additions and 17 deletions

View file

@ -481,10 +481,10 @@ fn test_branch_list_filtered_by_revset() {
3e9a5af6ef15 remote-rewrite@origin (hidden)
911e912015fb remote-keep
dad5f298ca57 remote-delete@origin
911e912015fb remote-keep
000000000000
"###);

View file

@ -158,3 +158,23 @@ impl Commit {
false
}
}
/// Wrapper to sort `Commit` by committer timestamp.
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub(crate) struct CommitByCommitterTimestamp(pub Commit);
impl Ord for CommitByCommitterTimestamp {
fn cmp(&self, other: &Self) -> Ordering {
let self_timestamp = &self.0.committer().timestamp.timestamp;
let other_timestamp = &other.0.committer().timestamp.timestamp;
self_timestamp
.cmp(other_timestamp)
.then_with(|| self.0.cmp(&other.0)) // to comply with Eq
}
}
impl PartialOrd for CommitByCommitterTimestamp {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

View file

@ -35,7 +35,7 @@ use tempfile::NamedTempFile;
use thiserror::Error;
use crate::backend::{ChangeId, CommitId, ObjectId};
use crate::commit::Commit;
use crate::commit::{Commit, CommitByCommitterTimestamp};
use crate::file_util::persist_content_addressed_temp_file;
use crate::index::{
HexPrefix, Index, IndexStore, IndexWriteError, MutableIndex, PrefixResolution, ReadonlyIndex,
@ -147,28 +147,29 @@ impl DefaultIndexStore {
new_heads_count = new_heads.len(),
"indexing commits reachable from historical heads"
);
// Build a list of ancestors of heads where parents and predecessors come before
// Build a list of ancestors of heads where parents and predecessors come after
// the commit itself.
let parent_file_has_id = |id: &CommitId| {
maybe_parent_file
.as_ref()
.map_or(false, |index| index.has_id(id))
};
let commits = dag_walk::topo_order_forward(
let commits = dag_walk::topo_order_reverse_ord(
new_heads
.iter()
.filter(|&id| !parent_file_has_id(id))
.map(|id| store.get_commit(id).unwrap())
.sorted(),
|commit| commit.id().clone(),
|commit| {
.map(CommitByCommitterTimestamp),
|CommitByCommitterTimestamp(commit)| commit.id().clone(),
|CommitByCommitterTimestamp(commit)| {
itertools::chain(commit.parent_ids(), commit.predecessor_ids())
.filter(|&id| !parent_file_has_id(id))
.map(|id| store.get_commit(id).unwrap())
.map(CommitByCommitterTimestamp)
.collect_vec()
},
);
for commit in &commits {
for CommitByCommitterTimestamp(commit) in commits.iter().rev() {
data.add_commit(commit);
}

View file

@ -242,7 +242,6 @@ pub fn import_some_refs(
for commit in &head_commits {
prevent_gc(git_repo, commit.id())?;
}
head_commits.reverse(); // TODO: sort chronologically by add_heads()
mut_repo.add_heads(&head_commits);
// Apply the change that happened in git since last time we imported refs.

View file

@ -33,7 +33,7 @@ use crate::backend::{
Backend, BackendError, BackendInitError, BackendLoadError, BackendResult, ChangeId, CommitId,
ObjectId, TreeId,
};
use crate::commit::Commit;
use crate::commit::{Commit, CommitByCommitterTimestamp};
use crate::commit_builder::CommitBuilder;
use crate::default_index_store::DefaultIndexStore;
use crate::default_submodule_store::DefaultSubmoduleStore;
@ -938,19 +938,20 @@ impl MutableRepo {
}
}
_ => {
let missing_commits = dag_walk::topo_order_forward(
heads.iter().cloned(),
|commit: &Commit| commit.id().clone(),
|commit: &Commit| -> Vec<Commit> {
let missing_commits = dag_walk::topo_order_reverse_ord(
heads.iter().cloned().map(CommitByCommitterTimestamp),
|CommitByCommitterTimestamp(commit)| commit.id().clone(),
|CommitByCommitterTimestamp(commit)| {
commit
.parent_ids()
.iter()
.filter(|id| !self.index().has_id(id))
.map(|id| self.store().get_commit(id).unwrap())
.collect()
.map(CommitByCommitterTimestamp)
.collect_vec()
},
);
for missing_commit in &missing_commits {
for CommitByCommitterTimestamp(missing_commit) in missing_commits.iter().rev() {
self.index.add_commit(missing_commit);
}
for head in heads {