// Copyright 2021 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use std::cmp::min; use std::collections::{BTreeMap, HashMap, HashSet}; use crate::index::{IndexEntry, IndexPosition}; use crate::nightly_shims::BTreeMapExt; use crate::revset::RevsetIterator; #[derive(Debug, PartialEq, Eq, Clone, Hash)] pub struct RevsetGraphEdge { pub target: IndexPosition, pub edge_type: RevsetGraphEdgeType, } impl RevsetGraphEdge { pub fn missing(target: IndexPosition) -> Self { Self { target, edge_type: RevsetGraphEdgeType::Missing, } } pub fn direct(target: IndexPosition) -> Self { Self { target, edge_type: RevsetGraphEdgeType::Direct, } } pub fn indirect(target: IndexPosition) -> Self { Self { target, edge_type: RevsetGraphEdgeType::Indirect, } } } #[derive(Debug, PartialEq, Eq, Clone, Hash)] pub enum RevsetGraphEdgeType { Missing, Direct, Indirect, } // Given an iterator over some set of revisions, yields the same revisions with // associated edge types. // // If a revision's parent is in the input set, then the edge will be "direct". // Otherwise, there will be one "indirect" edge for each closest ancestor in the // set, and one "missing" edge for each edge leading outside the set. // // Example (uppercase characters are in the input set): // // A A // |\ |\ // B c B : // |\| => |\: // d E ~ E // |/ ~ // root // // The implementation works by walking the input iterator in one commit at a // time. It then considers all parents of the commit. It looks ahead in the // input iterator far enough that all the parents will have been consumed if // they are in the input (and puts them away so we can emit them later). If a // parent of the current commit is not in the input set (i.e. it was not // in the look-ahead), we walk these external commits until we end up back back // in the input set. That walk may result in consuming more elements from the // input iterator. In the example above, when we consider "A", we will initially // look ahead to "B" and "c". When we consider edges from the external commit // "c", we will further consume the input iterator to "E". // // Missing edges are those that don't lead back into the input set. If all edges // from an external commit are missing, we consider the edge to that edge to // also be missing. In the example above, that means that "B" will have a // missing edge to "d" rather than to the root. // // The iterator can be configured to skip transitive edges that it would // otherwise return. In this mode (which is the default), the edge from "A" to // "E" in the example above would be excluded because there's also a transitive // path from "A" to "E" via "B". The implementation of that mode // adds a filtering step just before yielding the edges for a commit. The // filtering works doing a DFS in the simplified graph. That may require even // more look-ahead. Consider this example (uppercase characters are in the input // set): // // J // /| // | i // | |\ // | | H // G | | // | e f // | \|\ // | D | // \ / c // b / // |/ // A // | // root // // When walking from "J", we'll find indirect edges to "H", "G", and "D". This // is our unfiltered set of edges, before removing transitive edges. In order to // know that "D" is an ancestor of "H", we need to also walk from "H". We use // the same search for finding edges from "H" as we used from "J". That results // in looking ahead all the way to "A". We could reduce the amount of look-ahead // by stopping at "c" since we're only interested in edges that could lead to // "D", but that would require extra book-keeping to remember for later that the // edges from "f" and "H" are only partially computed. pub struct RevsetGraphIterator<'revset, 'repo> { input_set_iter: RevsetIterator<'revset, 'repo>, // Commits in the input set we had to take out of the iterator while walking external // edges. Does not necessarily include the commit we're currently about to emit. look_ahead: BTreeMap>, // The last consumed position. This is always the smallest key in the look_ahead map, but it's // faster to keep a separate field for it. min_position: IndexPosition, // Edges for commits not in the input set. // TODO: Remove unneeded entries here as we go (that's why it's an ordered map)? edges: BTreeMap>, skip_transitive_edges: bool, } impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { pub fn new(iter: RevsetIterator<'revset, 'repo>) -> RevsetGraphIterator<'revset, 'repo> { RevsetGraphIterator { input_set_iter: iter, look_ahead: Default::default(), min_position: IndexPosition::MAX, edges: Default::default(), skip_transitive_edges: true, } } pub fn set_skip_transitive_edges(mut self, skip_transitive_edges: bool) -> Self { self.skip_transitive_edges = skip_transitive_edges; self } pub fn reversed(self) -> ReverseRevsetGraphIterator<'repo> { ReverseRevsetGraphIterator::new(self) } fn next_index_entry(&mut self) -> Option> { if let Some(index_entry) = self.look_ahead.pop_last_value() { return Some(index_entry); } self.input_set_iter.next() } fn edges_from_internal_commit( &mut self, index_entry: &IndexEntry<'repo>, ) -> HashSet { if let Some(edges) = self.edges.get(&index_entry.position()) { return edges.clone(); } let mut edges = HashSet::new(); for parent in index_entry.parents() { let parent_position = parent.position(); self.consume_to(parent_position); if self.look_ahead.contains_key(&parent_position) { edges.insert(RevsetGraphEdge::direct(parent_position)); } else { let parent_edges = self.edges_from_external_commit(parent); if parent_edges .iter() .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing) { edges.insert(RevsetGraphEdge::missing(parent_position)); } else { edges.extend(parent_edges); } } } self.edges.insert(index_entry.position(), edges.clone()); edges } fn edges_from_external_commit( &mut self, index_entry: IndexEntry<'repo>, ) -> HashSet { let position = index_entry.position(); let mut stack = vec![index_entry]; while let Some(entry) = stack.last() { let position = entry.position(); let mut edges = HashSet::new(); let mut parents_complete = true; for parent in entry.parents() { let parent_position = parent.position(); self.consume_to(parent_position); if self.look_ahead.contains_key(&parent_position) { // We have found a path back into the input set edges.insert(RevsetGraphEdge::indirect(parent_position)); } else if let Some(parent_edges) = self.edges.get(&parent_position) { if parent_edges .iter() .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing) { edges.insert(RevsetGraphEdge::missing(parent_position)); } else { edges.extend(parent_edges.iter().cloned()); } } else if parent_position < self.min_position { // The parent is not in the input set edges.insert(RevsetGraphEdge::missing(parent_position)); } else { // The parent is not in the input set but it's somewhere in the range // where we have commits in the input set, so continue searching. stack.push(parent); parents_complete = false; } } if parents_complete { stack.pop().unwrap(); self.edges.insert(position, edges); } } self.edges.get(&position).unwrap().clone() } fn remove_transitive_edges( &mut self, edges: HashSet, ) -> HashSet { if !edges .iter() .any(|edge| edge.edge_type == RevsetGraphEdgeType::Indirect) { return edges; } let mut min_generation = u32::MAX; let mut initial_targets = HashSet::new(); let mut work = vec![]; // To start with, add the edges one step after the input edges. for edge in &edges { initial_targets.insert(edge.target); if edge.edge_type != RevsetGraphEdgeType::Missing { let entry = self.look_ahead.get(&edge.target).unwrap().clone(); min_generation = min(min_generation, entry.generation_number()); work.extend(self.edges_from_internal_commit(&entry)); } } // Find commits reachable transitively and add them to the `unwanted` set. let mut unwanted = HashSet::new(); while let Some(edge) = work.pop() { if edge.edge_type == RevsetGraphEdgeType::Missing || edge.target < self.min_position { continue; } if !unwanted.insert(edge.target) { // Already visited continue; } if initial_targets.contains(&edge.target) { // Already visited continue; } let entry = self.look_ahead.get(&edge.target).unwrap().clone(); if entry.generation_number() < min_generation { continue; } work.extend(self.edges_from_internal_commit(&entry)); } edges .into_iter() .filter(|edge| !unwanted.contains(&edge.target)) .collect() } fn consume_to(&mut self, pos: IndexPosition) { while pos < self.min_position { if let Some(next_entry) = self.input_set_iter.next() { let next_position = next_entry.position(); self.look_ahead.insert(next_position, next_entry); self.min_position = next_position; } else { break; } } } } impl<'revset, 'repo> Iterator for RevsetGraphIterator<'revset, 'repo> { type Item = (IndexEntry<'repo>, Vec); fn next(&mut self) -> Option { let index_entry = self.next_index_entry()?; let mut edges = self.edges_from_internal_commit(&index_entry); if self.skip_transitive_edges { edges = self.remove_transitive_edges(edges); } let mut edges: Vec<_> = edges.into_iter().collect(); edges.sort_by(|edge1, edge2| edge2.target.cmp(&edge1.target)); Some((index_entry, edges)) } } pub struct ReverseRevsetGraphIterator<'repo> { items: Vec<(IndexEntry<'repo>, Vec)>, } impl<'repo> ReverseRevsetGraphIterator<'repo> { fn new<'revset>(input: RevsetGraphIterator<'revset, 'repo>) -> Self { let mut entries = vec![]; let mut reverse_edges: HashMap> = HashMap::new(); for (entry, edges) in input { for RevsetGraphEdge { target, edge_type } in edges { reverse_edges .entry(target) .or_default() .push(RevsetGraphEdge { target: entry.position(), edge_type, }) } entries.push(entry); } let mut items = vec![]; for entry in entries.into_iter() { let edges = reverse_edges .get(&entry.position()) .cloned() .unwrap_or_default(); items.push((entry, edges)); } Self { items } } } impl<'repo> Iterator for ReverseRevsetGraphIterator<'repo> { type Item = (IndexEntry<'repo>, Vec); fn next(&mut self) -> Option { self.items.pop() } }