diff --git a/lib/src/revset_graph_iterator.rs b/lib/src/revset_graph_iterator.rs index b14e45d5f..11874bdf0 100644 --- a/lib/src/revset_graph_iterator.rs +++ b/lib/src/revset_graph_iterator.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::cmp::min; use std::collections::{BTreeMap, HashSet}; use crate::index::{IndexEntry, IndexPosition}; @@ -83,6 +84,40 @@ pub enum RevsetGraphEdgeType { // from an external commit are missing, we consider the edge to that edge to // also be missing. In the example above, that means that "B" will have a // missing edge to "d" rather than to the root. +// +// The iterator can be configured to skip transitive edges that it would +// otherwise return. In this mode (which is the default), the edge from "A" to +// "E" in the example above would be excluded because there's also a transitive +// path from "A" to "E" via "B". The implementation of that mode +// adds a filtering step just before yielding the edges for a commit. The +// filtering works doing a DFS in the simplified graph. That may require even +// more look-ahead. Consider this example (uppercase characters are in the input +// set): +// +// J +// /| +// | i +// | |\ +// | | H +// G | | +// | e f +// | \|\ +// | D | +// \ / c +// b / +// |/ +// A +// | +// root +// +// When walking from "J", we'll find indirect edges to "H", "G", and "D". This +// is our unfiltered set of edges, before removing transitive edges. In order to +// know that "D" is an ancestor of "H", we need to also walk from "H". We use +// the same search for finding edges from "H" as we used from "J". That results +// in looking ahead all the way to "A". We could reduce the amount of look-ahead +// by stopping at "c" since we're only interested in edges that could lead to +// "D", but that would require extra book-keeping to remember for later that the +// edges from "f" and "H" are only partially computed. pub struct RevsetGraphIterator<'revset, 'repo> { input_set_iter: RevsetIterator<'revset, 'repo>, // Commits in the input set we had to take out of the iterator while walking external @@ -93,7 +128,8 @@ pub struct RevsetGraphIterator<'revset, 'repo> { min_position: IndexPosition, // Edges for commits not in the input set. // TODO: Remove unneeded entries here as we go (that's why it's an ordered map)? - external_commits: BTreeMap>, + edges: BTreeMap>, + skip_transitive_edges: bool, } impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { @@ -102,10 +138,16 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { input_set_iter: iter, look_ahead: Default::default(), min_position: IndexPosition::MAX, - external_commits: Default::default(), + edges: Default::default(), + skip_transitive_edges: true, } } + pub fn set_skip_transitive_edges(mut self, skip_transitive_edges: bool) -> Self { + self.skip_transitive_edges = skip_transitive_edges; + self + } + fn next_index_entry(&mut self) -> Option> { if let Some((_, index_entry)) = self.look_ahead.pop_last() { return Some(index_entry); @@ -117,6 +159,9 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { &mut self, index_entry: &IndexEntry<'repo>, ) -> HashSet { + if let Some(edges) = self.edges.get(&index_entry.position()) { + return edges.clone(); + } let mut edges = HashSet::new(); for parent in index_entry.parents() { let parent_position = parent.position(); @@ -135,6 +180,7 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { } } } + self.edges.insert(index_entry.position(), edges.clone()); edges } @@ -154,7 +200,7 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { if self.look_ahead.contains_key(&parent_position) { // We have found a path back into the input set edges.insert(RevsetGraphEdge::indirect(parent_position)); - } else if let Some(parent_edges) = self.external_commits.get(&parent_position) { + } else if let Some(parent_edges) = self.edges.get(&parent_position) { if parent_edges .iter() .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing) @@ -175,10 +221,59 @@ impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { } if parents_complete { stack.pop().unwrap(); - self.external_commits.insert(position, edges); + self.edges.insert(position, edges); } } - self.external_commits.get(&position).unwrap().clone() + self.edges.get(&position).unwrap().clone() + } + + fn remove_transitive_edges( + &mut self, + edges: HashSet, + ) -> HashSet { + if !edges + .iter() + .any(|edge| edge.edge_type == RevsetGraphEdgeType::Indirect) + { + return edges; + } + let mut min_generation = u32::MAX; + let mut initial_targets = HashSet::new(); + let mut work = vec![]; + // To start with, add the edges one step after the input edges. + for edge in &edges { + initial_targets.insert(edge.target); + if edge.edge_type != RevsetGraphEdgeType::Missing { + let entry = self.look_ahead.get(&edge.target).unwrap().clone(); + min_generation = min(min_generation, entry.generation_number()); + work.extend(self.edges_from_internal_commit(&entry)); + } + } + // Find commits reachable transitively and add them to the `unwanted` set. + let mut unwanted = HashSet::new(); + while let Some(edge) = work.pop() { + if edge.edge_type == RevsetGraphEdgeType::Missing || edge.target < self.min_position { + continue; + } + if !unwanted.insert(edge.target) { + // Already visited + continue; + } + if initial_targets.contains(&edge.target) { + // Already visited + continue; + } + let entry = self.look_ahead.get(&edge.target).unwrap().clone(); + if entry.generation_number() < min_generation { + continue; + } + work.extend(self.edges_from_internal_commit(&entry)); + } + + edges + .into_iter() + .filter(|edge| !unwanted.contains(&edge.target)) + .collect() } fn consume_to(&mut self, pos: IndexPosition) { @@ -199,7 +294,10 @@ impl<'revset, 'repo> Iterator for RevsetGraphIterator<'revset, 'repo> { fn next(&mut self) -> Option { if let Some(index_entry) = self.next_index_entry() { - let edges = self.edges_from_internal_commit(&index_entry); + let mut edges = self.edges_from_internal_commit(&index_entry); + if self.skip_transitive_edges { + edges = self.remove_transitive_edges(edges); + } Some((index_entry, edges)) } else { None diff --git a/lib/tests/test_revset_graph_iterator.rs b/lib/tests/test_revset_graph_iterator.rs index fa0f05753..c22b52e9c 100644 --- a/lib/tests/test_revset_graph_iterator.rs +++ b/lib/tests/test_revset_graph_iterator.rs @@ -17,9 +17,11 @@ use jujube_lib::revset_graph_iterator::RevsetGraphEdge; use jujube_lib::testutils; use jujube_lib::testutils::CommitGraphBuilder; use maplit::hashset; +use test_case::test_case; -#[test] -fn test_graph_iterator_linearized() { +#[test_case(false ; "keep transitive edges")] +#[test_case(true ; "skip transitive edges")] +fn test_graph_iterator_linearized(skip_transitive_edges: bool) { let settings = testutils::user_settings(); let (_temp_dir, repo) = testutils::init_repo(&settings, true); let mut tx = repo.start_transaction("test"); @@ -45,7 +47,11 @@ fn test_graph_iterator_linearized() { let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap(); let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_a, &commit_d]); - let commits: Vec<_> = revset.iter().graph().collect(); + let commits: Vec<_> = revset + .iter() + .graph() + .set_skip_transitive_edges(skip_transitive_edges) + .collect(); drop(revset); assert_eq!(commits.len(), 2); assert_eq!(commits[0].0.commit_id(), *commit_d.id()); @@ -56,8 +62,9 @@ fn test_graph_iterator_linearized() { tx.discard(); } -#[test] -fn test_graph_iterator_virtual_octopus() { +#[test_case(false ; "keep transitive edges")] +#[test_case(true ; "skip transitive edges")] +fn test_graph_iterator_virtual_octopus(skip_transitive_edges: bool) { let settings = testutils::user_settings(); let (_temp_dir, repo) = testutils::init_repo(&settings, true); let mut tx = repo.start_transaction("test"); @@ -90,7 +97,11 @@ fn test_graph_iterator_virtual_octopus() { mut_repo.as_repo_ref(), &[&commit_a, &commit_b, &commit_c, &commit_f], ); - let commits: Vec<_> = revset.iter().graph().collect(); + let commits: Vec<_> = revset + .iter() + .graph() + .set_skip_transitive_edges(skip_transitive_edges) + .collect(); drop(revset); assert_eq!(commits.len(), 4); assert_eq!(commits[0].0.commit_id(), *commit_f.id()); @@ -112,8 +123,9 @@ fn test_graph_iterator_virtual_octopus() { tx.discard(); } -#[test] -fn test_graph_iterator_simple_fork() { +#[test_case(false ; "keep transitive edges")] +#[test_case(true ; "skip transitive edges")] +fn test_graph_iterator_simple_fork(skip_transitive_edges: bool) { let settings = testutils::user_settings(); let (_temp_dir, repo) = testutils::init_repo(&settings, true); let mut tx = repo.start_transaction("test"); @@ -143,7 +155,11 @@ fn test_graph_iterator_simple_fork() { let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap(); let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_a, &commit_c, &commit_e]); - let commits: Vec<_> = revset.iter().graph().collect(); + let commits: Vec<_> = revset + .iter() + .graph() + .set_skip_transitive_edges(skip_transitive_edges) + .collect(); drop(revset); assert_eq!(commits.len(), 3); assert_eq!(commits[0].0.commit_id(), *commit_e.id()); @@ -156,15 +172,17 @@ fn test_graph_iterator_simple_fork() { tx.discard(); } -#[test] -fn test_graph_iterator_multiple_missing() { +#[test_case(false ; "keep transitive edges")] +#[test_case(true ; "skip transitive edges")] +fn test_graph_iterator_multiple_missing(skip_transitive_edges: bool) { let settings = testutils::user_settings(); let (_temp_dir, repo) = testutils::init_repo(&settings, true); let mut tx = repo.start_transaction("test"); let mut_repo = tx.mut_repo(); // Tests that we get missing edges to "a" and "c" and not just one missing edge - // to the root. F + // to the root. + // F // / \ F // d e => /|\ // |\ /| ~ B ~ @@ -187,7 +205,11 @@ fn test_graph_iterator_multiple_missing() { let pos_c = mut_repo.index().commit_id_to_pos(commit_c.id()).unwrap(); let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_b, &commit_f]); - let commits: Vec<_> = revset.iter().graph().collect(); + let commits: Vec<_> = revset + .iter() + .graph() + .set_skip_transitive_edges(skip_transitive_edges) + .collect(); drop(revset); assert_eq!(commits.len(), 2); assert_eq!(commits[0].0.commit_id(), *commit_f.id()); @@ -205,14 +227,17 @@ fn test_graph_iterator_multiple_missing() { tx.discard(); } -#[test] -fn test_graph_iterator_edge_to_ancestor() { +#[test_case(false ; "keep transitive edges")] +#[test_case(true ; "skip transitive edges")] +fn test_graph_iterator_edge_to_ancestor(skip_transitive_edges: bool) { let settings = testutils::user_settings(); let (_temp_dir, repo) = testutils::init_repo(&settings, true); let mut tx = repo.start_transaction("test"); let mut_repo = tx.mut_repo(); - // Tests that we get both an edge from F to D and to D's ancestor C: + // Tests that we get both an edge from F to D and to D's ancestor C if we keep + // transitive edges and only the edge from F to D if we skip transitive + // edges: // F F // |\ |\ // D e D : @@ -235,19 +260,27 @@ fn test_graph_iterator_edge_to_ancestor() { let pos_d = mut_repo.index().commit_id_to_pos(commit_d.id()).unwrap(); let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_c, &commit_d, &commit_f]); - let commits: Vec<_> = revset.iter().graph().collect(); + let commits: Vec<_> = revset + .iter() + .graph() + .set_skip_transitive_edges(skip_transitive_edges) + .collect(); drop(revset); assert_eq!(commits.len(), 3); assert_eq!(commits[0].0.commit_id(), *commit_f.id()); assert_eq!(commits[1].0.commit_id(), *commit_d.id()); assert_eq!(commits[2].0.commit_id(), *commit_c.id()); - assert_eq!( - commits[0].1, - hashset![ - RevsetGraphEdge::indirect(pos_c), - RevsetGraphEdge::direct(pos_d) - ] - ); + if skip_transitive_edges { + assert_eq!(commits[0].1, hashset![RevsetGraphEdge::direct(pos_d)]); + } else { + assert_eq!( + commits[0].1, + hashset![ + RevsetGraphEdge::indirect(pos_c), + RevsetGraphEdge::direct(pos_d) + ] + ); + } assert_eq!( commits[1].1, hashset![ @@ -259,3 +292,96 @@ fn test_graph_iterator_edge_to_ancestor() { tx.discard(); } + +#[test_case(false ; "keep transitive edges")] +#[test_case(true ; "skip transitive edges")] +fn test_graph_iterator_edge_escapes_from_(skip_transitive_edges: bool) { + let settings = testutils::user_settings(); + let (_temp_dir, repo) = testutils::init_repo(&settings, true); + let mut tx = repo.start_transaction("test"); + let mut_repo = tx.mut_repo(); + + // Tests a more complex case for skipping transitive edges. + // J + // /| + // | i J + // | |\ /: + // | | H | H + // G | | G : + // | e f => : D + // | \|\ :/ + // | D | A + // \ / c | + // b / root + // |/ + // A + // | + // root + let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo); + let commit_a = graph_builder.initial_commit(); + let commit_b = graph_builder.commit_with_parents(&[&commit_a]); + let commit_c = graph_builder.commit_with_parents(&[&commit_a]); + let commit_d = graph_builder.commit_with_parents(&[&commit_b]); + let commit_e = graph_builder.commit_with_parents(&[&commit_d]); + let commit_f = graph_builder.commit_with_parents(&[&commit_d, &commit_c]); + let commit_g = graph_builder.commit_with_parents(&[&commit_b]); + let commit_h = graph_builder.commit_with_parents(&[&commit_f]); + let commit_i = graph_builder.commit_with_parents(&[&commit_e, &commit_h]); + let commit_j = graph_builder.commit_with_parents(&[&commit_g, &commit_i]); + let pos_root = mut_repo + .index() + .commit_id_to_pos(repo.store().root_commit_id()) + .unwrap(); + let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap(); + let pos_d = mut_repo.index().commit_id_to_pos(commit_d.id()).unwrap(); + let pos_g = mut_repo.index().commit_id_to_pos(commit_g.id()).unwrap(); + let pos_h = mut_repo.index().commit_id_to_pos(commit_h.id()).unwrap(); + + let revset = revset_for_commits( + mut_repo.as_repo_ref(), + &[&commit_a, &commit_d, &commit_g, &commit_h, &commit_j], + ); + let commits: Vec<_> = revset + .iter() + .graph() + .set_skip_transitive_edges(skip_transitive_edges) + .collect(); + drop(revset); + assert_eq!(commits.len(), 5); + assert_eq!(commits[0].0.commit_id(), *commit_j.id()); + assert_eq!(commits[1].0.commit_id(), *commit_h.id()); + assert_eq!(commits[2].0.commit_id(), *commit_g.id()); + assert_eq!(commits[3].0.commit_id(), *commit_d.id()); + assert_eq!(commits[4].0.commit_id(), *commit_a.id()); + if skip_transitive_edges { + assert_eq!( + commits[0].1, + hashset![ + RevsetGraphEdge::indirect(pos_h), + RevsetGraphEdge::direct(pos_g) + ] + ); + assert_eq!(commits[1].1, hashset![RevsetGraphEdge::indirect(pos_d)]); + } else { + assert_eq!( + commits[0].1, + hashset![ + RevsetGraphEdge::indirect(pos_h), + RevsetGraphEdge::direct(pos_g), + RevsetGraphEdge::indirect(pos_d), + ] + ); + assert_eq!( + commits[1].1, + hashset![ + RevsetGraphEdge::indirect(pos_d), + RevsetGraphEdge::indirect(pos_a) + ] + ); + } + assert_eq!(commits[2].1, hashset![RevsetGraphEdge::indirect(pos_a)]); + assert_eq!(commits[3].1, hashset![RevsetGraphEdge::indirect(pos_a)]); + assert_eq!(commits[4].1, hashset![RevsetGraphEdge::missing(pos_root)]); + + tx.discard(); +}