evolution: add support for updating state incrementally

We currently recalculate the entire evolution state whenever a new
commit is added within a transaction. That's clearly wasteful. This
commit makes the state-update incremental.
This commit is contained in:
Martin von Zweigbergk 2020-12-21 13:27:16 -08:00
parent ea7a6b9ce1
commit 4619942a57
4 changed files with 485 additions and 6 deletions

View file

@ -90,6 +90,10 @@ impl Commit {
parents
}
pub fn predecessor_ids(&self) -> Vec<CommitId> {
self.data.predecessors.clone()
}
pub fn predecessors(&self) -> Vec<Commit> {
let mut predecessors = Vec::new();
for predecessor in &self.data.predecessors {

View file

@ -28,8 +28,11 @@ use crate::transaction::{MutableRepo, Transaction};
use crate::trees::merge_trees;
use crate::view::View;
// TODO: Combine some maps/sets and use a struct as value instead.
// TODO: Move some of this into the index?
#[derive(Debug, Clone, Default)]
struct State {
children: HashMap<CommitId, HashSet<CommitId>>,
/// Contains all successors whether they have the same change id or not.
successors: HashMap<CommitId, HashSet<CommitId>>,
/// Contains the subset of the keys in `successors` for which there is a
@ -50,17 +53,16 @@ impl State {
heads.push(store.get_commit(commit_id).unwrap());
}
let mut commits = HashSet::new();
let mut children = HashMap::new();
let mut change_to_commits = HashMap::new();
for commit in walk_ancestors(heads) {
children.insert(commit.id().clone(), HashSet::new());
state.children.insert(commit.id().clone(), HashSet::new());
change_to_commits
.entry(commit.change_id().clone())
.or_insert_with(HashSet::new)
.insert(commit.id().clone());
commits.insert(commit);
}
// Scan all commits to find obsolete commits and to build a lookup of for
// Scan all commits to find obsolete commits and to build a lookup for
// children of a commit
for commit in &commits {
if commit.is_pruned() {
@ -80,7 +82,7 @@ impl State {
}
}
for parent in commit.parents() {
if let Some(children) = children.get_mut(parent.id()) {
if let Some(children) = state.children.get_mut(parent.id()) {
children.insert(commit.id().clone());
}
}
@ -100,7 +102,7 @@ impl State {
work.extend(state.pruned_commits.iter().cloned());
while !work.is_empty() {
let commit_id = work.pop().unwrap();
for child in children.get(&commit_id).unwrap() {
for child in state.children.get(&commit_id).unwrap() {
if state.orphan_commits.insert(child.clone()) {
work.push(child.clone());
}
@ -140,6 +142,94 @@ impl State {
.map_or(false, |non_obsoletes| non_obsoletes.len() > 1)
}
fn add_commit(&mut self, commit: &Commit) {
self.add_commit_data(
commit.id(),
commit.change_id(),
&commit.parent_ids(),
&commit.predecessor_ids(),
commit.is_pruned(),
);
}
fn add_commit_data(
&mut self,
commit_id: &CommitId,
change_id: &ChangeId,
parents: &[CommitId],
predecessors: &[CommitId],
is_pruned: bool,
) {
// TODO: Error out (or ignore?) if the root id is a predecessor or divergent
// (adding the root once should be fine). Perhaps this is not the right
// place to do that (we don't know what the root id is here).
for parent in parents {
self.children
.entry(parent.clone())
.or_default()
.insert(commit_id.clone());
}
if is_pruned {
self.pruned_commits.insert(commit_id.clone());
}
// Update the non_obsoletes_by_changeid by adding the new commit and removing
// the predecessors.
self.non_obsoletes_by_changeid
.entry(change_id.clone())
.or_default()
.insert(commit_id.clone());
for predecessor in predecessors {
self.successors
.entry(predecessor.clone())
.or_default()
.insert(commit_id.clone());
let became_obsolete = self
.non_obsoletes_by_changeid
.get_mut(change_id)
.unwrap()
.remove(predecessor);
// Mark descendants as orphans if the predecessor just became obsolete.
if became_obsolete {
assert!(self.obsolete_commits.insert(predecessor.clone()));
let mut descendants = HashSet::new();
for descendant in bfs(
vec![predecessor.clone()],
Box::new(|commit_id| commit_id.clone()),
Box::new(|commit_id| {
self.children
.get(commit_id)
.cloned()
.unwrap_or_else(HashSet::new)
}),
) {
descendants.insert(descendant);
}
descendants.remove(predecessor);
descendants = descendants
.iter()
.filter(|commit_id| {
!(self.obsolete_commits.contains(commit_id)
|| self.pruned_commits.contains(commit_id))
})
.cloned()
.collect();
self.orphan_commits.extend(descendants);
}
}
// Mark the new commit an orphan if any of its parents are obsolete, pruned, or
// orphans. Note that this has to be done late, in case a parent just got marked
// as obsolete or orphan above.
let is_orphan = parents.iter().any(|parent| {
self.obsolete_commits.contains(parent)
|| self.pruned_commits.contains(parent)
|| self.orphan_commits.contains(commit_id)
});
if is_orphan {
self.orphan_commits.insert(commit_id.clone());
}
}
pub fn new_parent(&self, store: &StoreWrapper, old_parent_id: &CommitId) -> HashSet<CommitId> {
let mut new_parents = HashSet::new();
if let Some(successor_ids) = self.successors.get(old_parent_id) {
@ -343,6 +433,10 @@ impl Evolution for MutableEvolution<'_, '_> {
}
impl MutableEvolution<'_, '_> {
pub fn add_commit(&mut self, commit: &Commit) {
self.state.add_commit(commit);
}
pub fn invalidate(&mut self) {
self.state = State::calculate(self.repo.store(), self.repo.view());
}
@ -515,3 +609,375 @@ fn evolve_two_divergent_commits(
.set_predecessors(vec![commit1.id().clone(), commit2.id().clone()])
.write_to_transaction(tx)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn add_commit_data_initial() {
let mut state = State::default();
let initial_commit = CommitId::from_hex("aaa111");
let initial_change = ChangeId::from_hex("aaa111");
state.add_commit_data(&initial_commit, &initial_change, &[], &[], false);
assert!(!state.is_obsolete(&initial_commit));
assert!(!state.is_orphan(&initial_commit));
assert!(!state.is_divergent(&initial_change));
}
#[test]
fn add_commit_data_pruned() {
let mut state = State::default();
let initial_commit = CommitId::from_hex("aaa111");
let initial_change = ChangeId::from_hex("aaa111");
state.add_commit_data(&initial_commit, &initial_change, &[], &[], true);
assert!(!state.is_obsolete(&initial_commit));
assert!(!state.is_orphan(&initial_commit));
assert!(!state.is_divergent(&initial_change));
}
#[test]
fn add_commit_data_creating_orphan() {
let mut state = State::default();
let initial_commit = CommitId::from_hex("aaa111");
let initial_change = ChangeId::from_hex("aaa111");
let orphan_commit1 = CommitId::from_hex("bbb111");
let orphan_change1 = ChangeId::from_hex("bbb111");
let orphan_commit2 = CommitId::from_hex("ccc111");
let orphan_change2 = ChangeId::from_hex("ccc111");
let obsolete_orphan_commit = CommitId::from_hex("ddd111");
let obsolete_orphan_change = ChangeId::from_hex("ddd111");
let pruned_orphan_commit = CommitId::from_hex("eee111");
let rewritten_commit = CommitId::from_hex("aaa222");
state.add_commit_data(&initial_commit, &initial_change, &[], &[], false);
state.add_commit_data(
&orphan_commit1,
&orphan_change1,
&[initial_commit.clone()],
&[],
false,
);
state.add_commit_data(
&orphan_commit2,
&orphan_change2,
&[orphan_commit1.clone()],
&[],
false,
);
state.add_commit_data(
&obsolete_orphan_commit,
&obsolete_orphan_change,
&[initial_commit.clone()],
&[],
false,
);
state.add_commit_data(
&pruned_orphan_commit,
&obsolete_orphan_change,
&[initial_commit.clone()],
&[obsolete_orphan_commit.clone()],
true,
);
state.add_commit_data(
&rewritten_commit,
&initial_change,
&[],
&[initial_commit.clone()],
false,
);
assert!(state.is_orphan(&orphan_commit1));
assert!(state.is_orphan(&orphan_commit2));
assert!(!state.is_orphan(&obsolete_orphan_commit));
assert!(!state.is_orphan(&pruned_orphan_commit));
assert!(!state.is_obsolete(&orphan_commit1));
assert!(!state.is_obsolete(&orphan_commit2));
assert!(state.is_obsolete(&obsolete_orphan_commit));
assert!(!state.is_obsolete(&pruned_orphan_commit));
}
#[test]
fn add_commit_data_new_commit_on_obsolete() {
let mut state = State::default();
let initial_commit = CommitId::from_hex("aaa111");
let initial_change = ChangeId::from_hex("aaa111");
let rewritten_commit = CommitId::from_hex("aaa222");
let new_commit = CommitId::from_hex("bbb111");
let new_change = ChangeId::from_hex("bbb111");
state.add_commit_data(&initial_commit, &initial_change, &[], &[], false);
state.add_commit_data(
&rewritten_commit,
&initial_change,
&[],
&[initial_commit.clone()],
false,
);
state.add_commit_data(
&new_commit,
&new_change,
&[initial_commit.clone()],
&[],
false,
);
assert!(state.is_orphan(&new_commit));
}
#[test]
fn add_commit_data_new_commit_on_orphan() {
let mut state = State::default();
let initial_commit = CommitId::from_hex("aaa111");
let initial_change = ChangeId::from_hex("aaa111");
let rewritten_commit = CommitId::from_hex("aaa222");
let orphan_commit = CommitId::from_hex("bbb111");
let orphan_change = ChangeId::from_hex("bbb111");
let new_commit = CommitId::from_hex("bbb111");
let new_change = ChangeId::from_hex("bbb111");
state.add_commit_data(&initial_commit, &initial_change, &[], &[], false);
state.add_commit_data(
&rewritten_commit,
&initial_change,
&[],
&[initial_commit.clone()],
false,
);
state.add_commit_data(
&orphan_commit,
&orphan_change,
&[initial_commit.clone()],
&[],
false,
);
state.add_commit_data(
&new_commit,
&new_change,
&[orphan_commit.clone()],
&[],
false,
);
assert!(state.is_orphan(&new_commit));
}
#[test]
fn add_commit_data_new_commit_on_pruned() {
let mut state = State::default();
let pruned_commit = CommitId::from_hex("aaa111");
let pruned_change = ChangeId::from_hex("aaa111");
let new_commit = CommitId::from_hex("bbb111");
let new_change = ChangeId::from_hex("bbb111");
state.add_commit_data(&pruned_commit, &pruned_change, &[], &[], true);
state.add_commit_data(
&new_commit,
&new_change,
&[pruned_commit.clone()],
&[],
false,
);
assert!(state.is_orphan(&new_commit));
}
#[test]
fn add_commit_data_rewrite_as_child() {
let mut state = State::default();
let initial_commit = CommitId::from_hex("aaa111");
let initial_change = ChangeId::from_hex("aaa111");
let rewritten_commit = CommitId::from_hex("aaa222");
state.add_commit_data(&initial_commit, &initial_change, &[], &[], false);
// The new commit is both a child and a successor of the initial commit
state.add_commit_data(
&rewritten_commit,
&initial_change,
&[initial_commit.clone()],
&[initial_commit.clone()],
false,
);
assert!(state.is_obsolete(&initial_commit));
assert!(!state.is_obsolete(&rewritten_commit));
assert!(!state.is_orphan(&initial_commit));
assert!(state.is_orphan(&rewritten_commit));
assert!(!state.is_divergent(&initial_change));
}
#[test]
fn add_commit_data_duplicates() {
let mut state = State::default();
let initial_commit = CommitId::from_hex("aaa111");
let initial_change = ChangeId::from_hex("aaa111");
let duplicate_commit1 = CommitId::from_hex("bbb111");
let duplicate_change1 = ChangeId::from_hex("bbb111");
let duplicate_commit2 = CommitId::from_hex("ccc111");
let duplicate_change2 = ChangeId::from_hex("ccc111");
state.add_commit_data(&initial_commit, &initial_change, &[], &[], false);
state.add_commit_data(
&duplicate_commit1,
&duplicate_change1,
&[],
&[initial_commit.clone()],
false,
);
state.add_commit_data(
&duplicate_commit2,
&duplicate_change2,
&[],
&[initial_commit.clone()],
false,
);
assert!(!state.is_obsolete(&initial_commit));
assert!(!state.is_obsolete(&duplicate_commit1));
assert!(!state.is_obsolete(&duplicate_commit2));
assert!(!state.is_divergent(&initial_change));
assert!(!state.is_divergent(&duplicate_change1));
assert!(!state.is_divergent(&duplicate_change2));
assert_eq!(
state.successors(&initial_commit),
hashset!(duplicate_commit1.clone(), duplicate_commit2.clone())
);
}
#[test]
fn add_commit_data_divergent() {
let mut state = State::default();
let initial_commit = CommitId::from_hex("aaa111");
let initial_change = ChangeId::from_hex("aaa111");
let rewritten_commit1 = CommitId::from_hex("aaa222");
let rewritten_commit2 = CommitId::from_hex("aaa333");
state.add_commit_data(&initial_commit, &initial_change, &[], &[], false);
state.add_commit_data(
&rewritten_commit1,
&initial_change,
&[],
&[initial_commit.clone()],
false,
);
state.add_commit_data(
&rewritten_commit2,
&initial_change,
&[],
&[initial_commit.clone()],
false,
);
assert!(state.is_obsolete(&initial_commit));
assert!(!state.is_obsolete(&rewritten_commit1));
assert!(!state.is_obsolete(&rewritten_commit2));
assert!(state.is_divergent(&initial_change));
assert_eq!(
state.successors(&initial_commit),
hashset!(rewritten_commit1.clone(), rewritten_commit2.clone())
);
}
#[test]
fn add_commit_data_divergent_pruned() {
let mut state = State::default();
let initial_commit = CommitId::from_hex("aaa111");
let initial_change = ChangeId::from_hex("aaa111");
let rewritten_pruned = CommitId::from_hex("aaa222");
let rewritten_non_pruned = CommitId::from_hex("aaa333");
state.add_commit_data(&initial_commit, &initial_change, &[], &[], false);
state.add_commit_data(
&rewritten_pruned,
&initial_change,
&[],
&[initial_commit.clone()],
true,
);
state.add_commit_data(
&rewritten_non_pruned,
&initial_change,
&[],
&[initial_commit.clone()],
false,
);
assert!(state.is_obsolete(&initial_commit));
assert!(!state.is_obsolete(&rewritten_pruned));
assert!(!state.is_obsolete(&rewritten_non_pruned));
// It's still divergent even if one side is pruned
assert!(state.is_divergent(&initial_change));
assert_eq!(
state.successors(&initial_commit),
hashset!(rewritten_pruned.clone(), rewritten_non_pruned.clone())
);
}
#[test]
fn add_commit_data_divergent_unrelated() {
let mut state = State::default();
let initial_commit = CommitId::from_hex("aaa111");
let initial_change = ChangeId::from_hex("aaa111");
let rewritten_commit = CommitId::from_hex("aaa222");
state.add_commit_data(&initial_commit, &initial_change, &[], &[], false);
// Same change id as the initial commit but no predecessor relationship to it
state.add_commit_data(&rewritten_commit, &initial_change, &[], &[], false);
assert!(!state.is_obsolete(&initial_commit));
assert!(!state.is_obsolete(&rewritten_commit));
assert!(state.is_divergent(&initial_change));
assert_eq!(state.successors(&initial_commit), hashset!());
}
#[test]
fn add_commit_data_divergent_convergent() {
let mut state = State::default();
let initial_commit = CommitId::from_hex("aaa111");
let initial_change = ChangeId::from_hex("aaa111");
let rewritten_commit1 = CommitId::from_hex("aaa222");
let rewritten_commit2 = CommitId::from_hex("aaa333");
let convergent_commit = CommitId::from_hex("aaa444");
state.add_commit_data(&initial_commit, &initial_change, &[], &[], false);
state.add_commit_data(
&rewritten_commit1,
&initial_change,
&[],
&[initial_commit.clone()],
false,
);
state.add_commit_data(
&rewritten_commit2,
&initial_change,
&[],
&[initial_commit.clone()],
false,
);
state.add_commit_data(
&convergent_commit,
&initial_change,
&[],
&[rewritten_commit1.clone(), rewritten_commit2.clone()],
false,
);
assert!(state.is_obsolete(&initial_commit));
assert!(state.is_obsolete(&rewritten_commit1));
assert!(state.is_obsolete(&rewritten_commit2));
assert!(!state.is_obsolete(&convergent_commit));
assert!(!state.is_divergent(&initial_change));
assert_eq!(
state.successors(&rewritten_commit1),
hashset!(convergent_commit.clone())
);
assert_eq!(
state.successors(&rewritten_commit2),
hashset!(convergent_commit.clone())
);
}
}

View file

@ -52,6 +52,10 @@ impl Debug for ChangeId {
}
impl ChangeId {
pub fn from_hex(hex: &str) -> Self {
ChangeId(hex::decode(hex).unwrap())
}
pub fn hex(&self) -> String {
hex::encode(&self.0)
}

View file

@ -166,8 +166,13 @@ impl<'r> Transaction<'r> {
pub fn add_head(&mut self, head: &Commit) {
let mut_repo = Arc::get_mut(self.repo.as_mut().unwrap()).unwrap();
// TODO: Also add other ancestors that were not already known. For example, when
// the user creates a new repo backed by a git repo and they then check
// out some commit, we'll need add all ancestors of that commit. This
// code probably needs to be restructured a bit. Perhaps it should be
// higher-level code that makes sure unknown ancestors are also added.
mut_repo.view.as_mut().unwrap().add_head(head);
mut_repo.evolution.as_mut().unwrap().invalidate();
mut_repo.evolution.as_mut().unwrap().add_commit(head);
}
pub fn remove_head(&mut self, head: &Commit) {