// Copyright 2021 The Jujutsu Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use std::cmp::min; use std::collections::{BTreeMap, HashSet}; use crate::backend::CommitId; use crate::default_index_store::{IndexEntry, IndexPosition}; use crate::nightly_shims::BTreeMapExt; use crate::revset::{RevsetGraphEdge, RevsetGraphEdgeType}; /// Given an iterator over some set of revisions, yields the same revisions with /// associated edge types. /// /// If a revision's parent is in the input set, then the edge will be "direct". /// Otherwise, there will be one "indirect" edge for each closest ancestor in /// the set, and one "missing" edge for each edge leading outside the set. /// /// Example (uppercase characters are in the input set): /// /// A A /// |\ |\ /// B c B : /// |\| => |\: /// d E ~ E /// |/ ~ /// root /// /// The implementation works by walking the input iterator one commit at a /// time. It then considers all parents of the commit. It looks ahead in the /// input iterator far enough that all the parents will have been consumed if /// they are in the input (and puts them away so we can emit them later). If a /// parent of the current commit is not in the input set (i.e. it was not /// in the look-ahead), we walk these external commits until we end up back back /// in the input set. That walk may result in consuming more elements from the /// input iterator. In the example above, when we consider "A", we will /// initially look ahead to "B" and "c". When we consider edges from the /// external commit "c", we will further consume the input iterator to "E". /// /// Missing edges are those that don't lead back into the input set. If all /// edges from an external commit are missing, we consider the edge to that /// commit to also be missing. In the example above, that means that "B" will /// have a missing edge to "d" rather than to the root. /// /// The iterator can be configured to skip transitive edges that it would /// otherwise return. In this mode (which is the default), the edge from "A" to /// "E" in the example above would be excluded because there's also a transitive /// path from "A" to "E" via "B". The implementation of that mode /// adds a filtering step just before yielding the edges for a commit. The /// filtering works by doing a DFS in the simplified graph. That may require /// even more look-ahead. Consider this example (uppercase characters are in the /// input set): /// /// J /// /| /// | i /// | |\ /// | | H /// G | | /// | e f /// | \|\ /// | D | /// \ / c /// b / /// |/ /// A /// | /// root /// /// When walking from "J", we'll find indirect edges to "H", "G", and "D". This /// is our unfiltered set of edges, before removing transitive edges. In order /// to know that "D" is an ancestor of "H", we need to also walk from "H". We /// use the same search for finding edges from "H" as we used from "J". That /// results in looking ahead all the way to "A". We could reduce the amount of /// look-ahead by stopping at "c" since we're only interested in edges that /// could lead to "D", but that would require extra book-keeping to remember for /// later that the edges from "f" and "H" are only partially computed. pub struct RevsetGraphIterator<'revset, 'index> { input_set_iter: Box> + 'revset>, /// Commits in the input set we had to take out of the iterator while /// walking external edges. Does not necessarily include the commit /// we're currently about to emit. look_ahead: BTreeMap>, /// The last consumed position. This is always the smallest key in the /// look_ahead map, but it's faster to keep a separate field for it. min_position: IndexPosition, /// Edges for commits not in the input set. // TODO: Remove unneeded entries here as we go (that's why it's an ordered map)? edges: BTreeMap>, skip_transitive_edges: bool, } impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> { pub fn new( input_set_iter: Box> + 'revset>, ) -> RevsetGraphIterator<'revset, 'index> { RevsetGraphIterator { input_set_iter, look_ahead: Default::default(), min_position: IndexPosition::MAX, edges: Default::default(), skip_transitive_edges: true, } } pub fn set_skip_transitive_edges(mut self, skip_transitive_edges: bool) -> Self { self.skip_transitive_edges = skip_transitive_edges; self } fn next_index_entry(&mut self) -> Option> { if let Some(index_entry) = self.look_ahead.pop_last_value() { return Some(index_entry); } self.input_set_iter.next() } fn edges_from_internal_commit( &mut self, index_entry: &IndexEntry<'index>, ) -> HashSet<(IndexPosition, RevsetGraphEdge)> { if let Some(edges) = self.edges.get(&index_entry.position()) { return edges.clone(); } let mut edges = HashSet::new(); for parent in index_entry.parents() { let parent_position = parent.position(); let parent_commit_id = parent.commit_id(); self.consume_to(parent_position); if self.look_ahead.contains_key(&parent_position) { edges.insert((parent_position, RevsetGraphEdge::direct(parent_commit_id))); } else { let parent_edges = self.edges_from_external_commit(parent); if parent_edges .iter() .all(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Missing) { edges.insert((parent_position, RevsetGraphEdge::missing(parent_commit_id))); } else { edges.extend(parent_edges); } } } self.edges.insert(index_entry.position(), edges.clone()); edges } fn edges_from_external_commit( &mut self, index_entry: IndexEntry<'index>, ) -> HashSet<(IndexPosition, RevsetGraphEdge)> { let position = index_entry.position(); let mut stack = vec![index_entry]; while let Some(entry) = stack.last() { let position = entry.position(); let mut edges = HashSet::new(); let mut parents_complete = true; for parent in entry.parents() { let parent_position = parent.position(); self.consume_to(parent_position); if self.look_ahead.contains_key(&parent_position) { // We have found a path back into the input set edges.insert(( parent_position, RevsetGraphEdge::indirect(parent.commit_id()), )); } else if let Some(parent_edges) = self.edges.get(&parent_position) { if parent_edges .iter() .all(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Missing) { edges.insert(( parent_position, RevsetGraphEdge::missing(parent.commit_id()), )); } else { edges.extend(parent_edges.iter().cloned()); } } else if parent_position < self.min_position { // The parent is not in the input set edges.insert(( parent_position, RevsetGraphEdge::missing(parent.commit_id()), )); } else { // The parent is not in the input set but it's somewhere in the range // where we have commits in the input set, so continue searching. stack.push(parent); parents_complete = false; } } if parents_complete { stack.pop().unwrap(); self.edges.insert(position, edges); } } self.edges.get(&position).unwrap().clone() } fn remove_transitive_edges( &mut self, edges: HashSet<(IndexPosition, RevsetGraphEdge)>, ) -> HashSet<(IndexPosition, RevsetGraphEdge)> { if !edges .iter() .any(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Indirect) { return edges; } let mut min_generation = u32::MAX; let mut initial_targets = HashSet::new(); let mut work = vec![]; // To start with, add the edges one step after the input edges. for (target, edge) in &edges { initial_targets.insert(target); if edge.edge_type != RevsetGraphEdgeType::Missing { let entry = self.look_ahead.get(target).unwrap().clone(); min_generation = min(min_generation, entry.generation_number()); work.extend(self.edges_from_internal_commit(&entry)); } } // Find commits reachable transitively and add them to the `unwanted` set. let mut unwanted = HashSet::new(); while let Some((target, edge)) = work.pop() { if edge.edge_type == RevsetGraphEdgeType::Missing || target < self.min_position { continue; } if !unwanted.insert(target) { // Already visited continue; } if initial_targets.contains(&target) { // Already visited continue; } let entry = self.look_ahead.get(&target).unwrap().clone(); if entry.generation_number() < min_generation { continue; } work.extend(self.edges_from_internal_commit(&entry)); } edges .into_iter() .filter(|(target, _)| !unwanted.contains(target)) .collect() } fn consume_to(&mut self, pos: IndexPosition) { while pos < self.min_position { if let Some(next_entry) = self.input_set_iter.next() { let next_position = next_entry.position(); self.look_ahead.insert(next_position, next_entry); self.min_position = next_position; } else { break; } } } } impl<'revset, 'index> Iterator for RevsetGraphIterator<'revset, 'index> { type Item = (CommitId, Vec); fn next(&mut self) -> Option { let index_entry = self.next_index_entry()?; let mut edges = self.edges_from_internal_commit(&index_entry); if self.skip_transitive_edges { edges = self.remove_transitive_edges(edges); } let mut edges: Vec<_> = edges.into_iter().collect(); edges.sort_by(|(target_pos1, _), (target_pos2, _)| target_pos2.cmp(target_pos1)); let edges = edges.into_iter().map(|(_, edge)| edge).collect(); Some((index_entry.commit_id(), edges)) } }