From aef27d570191f2ba74de9ea58853d16efd9fd21a Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Thu, 29 Apr 2021 21:43:12 -0700 Subject: [PATCH] revsets: remove transitive edges in graph iterator by default The git.git repo seems to have lots of merges from far back in the history into newer history. That results in `jj log -r 'git_refs()'` being completely useless because of the number of such edges. For example, v2.31.0 has almost 600 edges going out of it and presumably merging (forking) back into various different previous versions. Git, unlike Mercurial, seems to remove an edge from the graph if the edge can also be reached via a longer path. This commit makes it so we also do that (i.e. the filtered graph is a transitive reduction of the graph before filtering). This slows down `jj log -r ,,v2.0.0 -T ""` by about 2%. That's still small enough that it doesn't seem worth it to have a separate iterator for contiguous ranges (which would be an option). --- lib/src/revset_graph_iterator.rs | 110 ++++++++++++++- lib/tests/test_revset_graph_iterator.rs | 174 ++++++++++++++++++++---- 2 files changed, 254 insertions(+), 30 deletions(-) diff --git a/lib/src/revset_graph_iterator.rs b/lib/src/revset_graph_iterator.rs index b14e45d5f..11874bdf0 100644 --- a/lib/src/revset_graph_iterator.rs +++ b/lib/src/revset_graph_iterator.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::min; use std::collections::{BTreeMap, HashSet}; use crate::index::{IndexEntry, IndexPosition}; @@ -83,6 +84,40 @@ pub enum RevsetGraphEdgeType { // from an external commit are missing, we consider the edge to that edge to // also be missing. In the example above, that means that "B" will have a // missing edge to "d" rather than to the root. +// +// The iterator can be configured to skip transitive edges that it would +// otherwise return. In this mode (which is the default), the edge from "A" to +// "E" in the example above would be excluded because there's also a transitive +// path from "A" to "E" via "B". The implementation of that mode +// adds a filtering step just before yielding the edges for a commit. The +// filtering works doing a DFS in the simplified graph. That may require even +// more look-ahead. Consider this example (uppercase characters are in the input +// set): +// +// J +// /| +// | i +// | |\ +// | | H +// G | | +// | e f +// | \|\ +// | D | +// \ / c +// b / +// |/ +// A +// | +// root +// +// When walking from "J", we'll find indirect edges to "H", "G", and "D". This +// is our unfiltered set of edges, before removing transitive edges. In order to +// know that "D" is an ancestor of "H", we need to also walk from "H". We use +// the same search for finding edges from "H" as we used from "J". That results +// in looking ahead all the way to "A". We could reduce the amount of look-ahead +// by stopping at "c" since we're only interested in edges that could lead to +// "D", but that would require extra book-keeping to remember for later that the +// edges from "f" and "H" are only partially computed. pub struct RevsetGraphIterator<'revset, 'repo> { input_set_iter: RevsetIterator<'revset, 'repo>, // Commits in the input set we had to take out of the iterator while walking external @@ -93,7 +128,8 @@ pub struct RevsetGraphIterator<'revset, 'repo> { min_position: IndexPosition, // Edges for commits not in the input set. // TODO: Remove unneeded entries here as we go (that's why it's an ordered map)? - external_commits: BTreeMap>, + edges: BTreeMap>, + skip_transitive_edges: bool, } impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { @@ -102,10 +138,16 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { input_set_iter: iter, look_ahead: Default::default(), min_position: IndexPosition::MAX, - external_commits: Default::default(), + edges: Default::default(), + skip_transitive_edges: true, } } + pub fn set_skip_transitive_edges(mut self, skip_transitive_edges: bool) -> Self { + self.skip_transitive_edges = skip_transitive_edges; + self + } + fn next_index_entry(&mut self) -> Option> { if let Some((_, index_entry)) = self.look_ahead.pop_last() { return Some(index_entry); @@ -117,6 +159,9 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { &mut self, index_entry: &IndexEntry<'repo>, ) -> HashSet { + if let Some(edges) = self.edges.get(&index_entry.position()) { + return edges.clone(); + } let mut edges = HashSet::new(); for parent in index_entry.parents() { let parent_position = parent.position(); @@ -135,6 +180,7 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { } } } + self.edges.insert(index_entry.position(), edges.clone()); edges } @@ -154,7 +200,7 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { if self.look_ahead.contains_key(&parent_position) { // We have found a path back into the input set edges.insert(RevsetGraphEdge::indirect(parent_position)); - } else if let Some(parent_edges) = self.external_commits.get(&parent_position) { + } else if let Some(parent_edges) = self.edges.get(&parent_position) { if parent_edges .iter() .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing) @@ -175,10 +221,59 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { } if parents_complete { stack.pop().unwrap(); - self.external_commits.insert(position, edges); + self.edges.insert(position, edges); } } - self.external_commits.get(&position).unwrap().clone() + self.edges.get(&position).unwrap().clone() + } + + fn remove_transitive_edges( + &mut self, + edges: HashSet, + ) -> HashSet { + if !edges + .iter() + .any(|edge| edge.edge_type == RevsetGraphEdgeType::Indirect) + { + return edges; + } + let mut min_generation = u32::MAX; + let mut initial_targets = HashSet::new(); + let mut work = vec![]; + // To start with, add the edges one step after the input edges. + for edge in &edges { + initial_targets.insert(edge.target); + if edge.edge_type != RevsetGraphEdgeType::Missing { + let entry = self.look_ahead.get(&edge.target).unwrap().clone(); + min_generation = min(min_generation, entry.generation_number()); + work.extend(self.edges_from_internal_commit(&entry)); + } + } + // Find commits reachable transitively and add them to the `unwanted` set. + let mut unwanted = HashSet::new(); + while let Some(edge) = work.pop() { + if edge.edge_type == RevsetGraphEdgeType::Missing || edge.target < self.min_position { + continue; + } + if !unwanted.insert(edge.target) { + // Already visited + continue; + } + if initial_targets.contains(&edge.target) { + // Already visited + continue; + } + let entry = self.look_ahead.get(&edge.target).unwrap().clone(); + if entry.generation_number() < min_generation { + continue; + } + work.extend(self.edges_from_internal_commit(&entry)); + } + + edges + .into_iter() + .filter(|edge| !unwanted.contains(&edge.target)) + .collect() } fn consume_to(&mut self, pos: IndexPosition) { @@ -199,7 +294,10 @@ impl<'revset, 'repo> Iterator for RevsetGraphIterator<'revset, 'repo> { fn next(&mut self) -> Option { if let Some(index_entry) = self.next_index_entry() { - let edges = self.edges_from_internal_commit(&index_entry); + let mut edges = self.edges_from_internal_commit(&index_entry); + if self.skip_transitive_edges { + edges = self.remove_transitive_edges(edges); + } Some((index_entry, edges)) } else { None diff --git a/lib/tests/test_revset_graph_iterator.rs b/lib/tests/test_revset_graph_iterator.rs index fa0f05753..c22b52e9c 100644 --- a/lib/tests/test_revset_graph_iterator.rs +++ b/lib/tests/test_revset_graph_iterator.rs @@ -17,9 +17,11 @@ use jujube_lib::revset_graph_iterator::RevsetGraphEdge; use jujube_lib::testutils; use jujube_lib::testutils::CommitGraphBuilder; use maplit::hashset; +use test_case::test_case; -#[test] -fn test_graph_iterator_linearized() { +#[test_case(false ; "keep transitive edges")] +#[test_case(true ; "skip transitive edges")] +fn test_graph_iterator_linearized(skip_transitive_edges: bool) { let settings = testutils::user_settings(); let (_temp_dir, repo) = testutils::init_repo(&settings, true); let mut tx = repo.start_transaction("test"); @@ -45,7 +47,11 @@ fn test_graph_iterator_linearized() { let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap(); let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_a, &commit_d]); - let commits: Vec<_> = revset.iter().graph().collect(); + let commits: Vec<_> = revset + .iter() + .graph() + .set_skip_transitive_edges(skip_transitive_edges) + .collect(); drop(revset); assert_eq!(commits.len(), 2); assert_eq!(commits[0].0.commit_id(), *commit_d.id()); @@ -56,8 +62,9 @@ fn test_graph_iterator_linearized() { tx.discard(); } -#[test] -fn test_graph_iterator_virtual_octopus() { +#[test_case(false ; "keep transitive edges")] +#[test_case(true ; "skip transitive edges")] +fn test_graph_iterator_virtual_octopus(skip_transitive_edges: bool) { let settings = testutils::user_settings(); let (_temp_dir, repo) = testutils::init_repo(&settings, true); let mut tx = repo.start_transaction("test"); @@ -90,7 +97,11 @@ fn test_graph_iterator_virtual_octopus() { mut_repo.as_repo_ref(), &[&commit_a, &commit_b, &commit_c, &commit_f], ); - let commits: Vec<_> = revset.iter().graph().collect(); + let commits: Vec<_> = revset + .iter() + .graph() + .set_skip_transitive_edges(skip_transitive_edges) + .collect(); drop(revset); assert_eq!(commits.len(), 4); assert_eq!(commits[0].0.commit_id(), *commit_f.id()); @@ -112,8 +123,9 @@ fn test_graph_iterator_virtual_octopus() { tx.discard(); } -#[test] -fn test_graph_iterator_simple_fork() { +#[test_case(false ; "keep transitive edges")] +#[test_case(true ; "skip transitive edges")] +fn test_graph_iterator_simple_fork(skip_transitive_edges: bool) { let settings = testutils::user_settings(); let (_temp_dir, repo) = testutils::init_repo(&settings, true); let mut tx = repo.start_transaction("test"); @@ -143,7 +155,11 @@ fn test_graph_iterator_simple_fork() { let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap(); let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_a, &commit_c, &commit_e]); - let commits: Vec<_> = revset.iter().graph().collect(); + let commits: Vec<_> = revset + .iter() + .graph() + .set_skip_transitive_edges(skip_transitive_edges) + .collect(); drop(revset); assert_eq!(commits.len(), 3); assert_eq!(commits[0].0.commit_id(), *commit_e.id()); @@ -156,15 +172,17 @@ fn test_graph_iterator_simple_fork() { tx.discard(); } -#[test] -fn test_graph_iterator_multiple_missing() { +#[test_case(false ; "keep transitive edges")] +#[test_case(true ; "skip transitive edges")] +fn test_graph_iterator_multiple_missing(skip_transitive_edges: bool) { let settings = testutils::user_settings(); let (_temp_dir, repo) = testutils::init_repo(&settings, true); let mut tx = repo.start_transaction("test"); let mut_repo = tx.mut_repo(); // Tests that we get missing edges to "a" and "c" and not just one missing edge - // to the root. F + // to the root. + // F // / \ F // d e => /|\ // |\ /| ~ B ~ @@ -187,7 +205,11 @@ fn test_graph_iterator_multiple_missing() { let pos_c = mut_repo.index().commit_id_to_pos(commit_c.id()).unwrap(); let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_b, &commit_f]); - let commits: Vec<_> = revset.iter().graph().collect(); + let commits: Vec<_> = revset + .iter() + .graph() + .set_skip_transitive_edges(skip_transitive_edges) + .collect(); drop(revset); assert_eq!(commits.len(), 2); assert_eq!(commits[0].0.commit_id(), *commit_f.id()); @@ -205,14 +227,17 @@ fn test_graph_iterator_multiple_missing() { tx.discard(); } -#[test] -fn test_graph_iterator_edge_to_ancestor() { +#[test_case(false ; "keep transitive edges")] +#[test_case(true ; "skip transitive edges")] +fn test_graph_iterator_edge_to_ancestor(skip_transitive_edges: bool) { let settings = testutils::user_settings(); let (_temp_dir, repo) = testutils::init_repo(&settings, true); let mut tx = repo.start_transaction("test"); let mut_repo = tx.mut_repo(); - // Tests that we get both an edge from F to D and to D's ancestor C: + // Tests that we get both an edge from F to D and to D's ancestor C if we keep + // transitive edges and only the edge from F to D if we skip transitive + // edges: // F F // |\ |\ // D e D : @@ -235,19 +260,27 @@ fn test_graph_iterator_edge_to_ancestor() { let pos_d = mut_repo.index().commit_id_to_pos(commit_d.id()).unwrap(); let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_c, &commit_d, &commit_f]); - let commits: Vec<_> = revset.iter().graph().collect(); + let commits: Vec<_> = revset + .iter() + .graph() + .set_skip_transitive_edges(skip_transitive_edges) + .collect(); drop(revset); assert_eq!(commits.len(), 3); assert_eq!(commits[0].0.commit_id(), *commit_f.id()); assert_eq!(commits[1].0.commit_id(), *commit_d.id()); assert_eq!(commits[2].0.commit_id(), *commit_c.id()); - assert_eq!( - commits[0].1, - hashset![ - RevsetGraphEdge::indirect(pos_c), - RevsetGraphEdge::direct(pos_d) - ] - ); + if skip_transitive_edges { + assert_eq!(commits[0].1, hashset![RevsetGraphEdge::direct(pos_d)]); + } else { + assert_eq!( + commits[0].1, + hashset![ + RevsetGraphEdge::indirect(pos_c), + RevsetGraphEdge::direct(pos_d) + ] + ); + } assert_eq!( commits[1].1, hashset![ @@ -259,3 +292,96 @@ fn test_graph_iterator_edge_to_ancestor() { tx.discard(); } + +#[test_case(false ; "keep transitive edges")] +#[test_case(true ; "skip transitive edges")] +fn test_graph_iterator_edge_escapes_from_(skip_transitive_edges: bool) { + let settings = testutils::user_settings(); + let (_temp_dir, repo) = testutils::init_repo(&settings, true); + let mut tx = repo.start_transaction("test"); + let mut_repo = tx.mut_repo(); + + // Tests a more complex case for skipping transitive edges. + // J + // /| + // | i J + // | |\ /: + // | | H | H + // G | | G : + // | e f => : D + // | \|\ :/ + // | D | A + // \ / c | + // b / root + // |/ + // A + // | + // root + let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo); + let commit_a = graph_builder.initial_commit(); + let commit_b = graph_builder.commit_with_parents(&[&commit_a]); + let commit_c = graph_builder.commit_with_parents(&[&commit_a]); + let commit_d = graph_builder.commit_with_parents(&[&commit_b]); + let commit_e = graph_builder.commit_with_parents(&[&commit_d]); + let commit_f = graph_builder.commit_with_parents(&[&commit_d, &commit_c]); + let commit_g = graph_builder.commit_with_parents(&[&commit_b]); + let commit_h = graph_builder.commit_with_parents(&[&commit_f]); + let commit_i = graph_builder.commit_with_parents(&[&commit_e, &commit_h]); + let commit_j = graph_builder.commit_with_parents(&[&commit_g, &commit_i]); + let pos_root = mut_repo + .index() + .commit_id_to_pos(repo.store().root_commit_id()) + .unwrap(); + let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap(); + let pos_d = mut_repo.index().commit_id_to_pos(commit_d.id()).unwrap(); + let pos_g = mut_repo.index().commit_id_to_pos(commit_g.id()).unwrap(); + let pos_h = mut_repo.index().commit_id_to_pos(commit_h.id()).unwrap(); + + let revset = revset_for_commits( + mut_repo.as_repo_ref(), + &[&commit_a, &commit_d, &commit_g, &commit_h, &commit_j], + ); + let commits: Vec<_> = revset + .iter() + .graph() + .set_skip_transitive_edges(skip_transitive_edges) + .collect(); + drop(revset); + assert_eq!(commits.len(), 5); + assert_eq!(commits[0].0.commit_id(), *commit_j.id()); + assert_eq!(commits[1].0.commit_id(), *commit_h.id()); + assert_eq!(commits[2].0.commit_id(), *commit_g.id()); + assert_eq!(commits[3].0.commit_id(), *commit_d.id()); + assert_eq!(commits[4].0.commit_id(), *commit_a.id()); + if skip_transitive_edges { + assert_eq!( + commits[0].1, + hashset![ + RevsetGraphEdge::indirect(pos_h), + RevsetGraphEdge::direct(pos_g) + ] + ); + assert_eq!(commits[1].1, hashset![RevsetGraphEdge::indirect(pos_d)]); + } else { + assert_eq!( + commits[0].1, + hashset![ + RevsetGraphEdge::indirect(pos_h), + RevsetGraphEdge::direct(pos_g), + RevsetGraphEdge::indirect(pos_d), + ] + ); + assert_eq!( + commits[1].1, + hashset![ + RevsetGraphEdge::indirect(pos_d), + RevsetGraphEdge::indirect(pos_a) + ] + ); + } + assert_eq!(commits[2].1, hashset![RevsetGraphEdge::indirect(pos_a)]); + assert_eq!(commits[3].1, hashset![RevsetGraphEdge::indirect(pos_a)]); + assert_eq!(commits[4].1, hashset![RevsetGraphEdge::missing(pos_root)]); + + tx.discard(); +}