2022-11-26 23:57:50 +00:00
|
|
|
// Copyright 2021 The Jujutsu Authors
|
2021-04-24 05:55:15 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2021-04-30 04:43:12 +00:00
|
|
|
use std::cmp::min;
|
2023-03-14 05:14:06 +00:00
|
|
|
use std::collections::{BTreeMap, HashSet};
|
2021-04-24 05:55:15 +00:00
|
|
|
|
2023-03-20 00:44:50 +00:00
|
|
|
use crate::backend::CommitId;
|
2023-03-01 05:50:04 +00:00
|
|
|
use crate::default_index_store::{IndexEntry, IndexPosition};
|
2022-02-21 05:14:48 +00:00
|
|
|
use crate::nightly_shims::BTreeMapExt;
|
2023-03-23 01:05:41 +00:00
|
|
|
use crate::revset::{RevsetGraphEdge, RevsetGraphEdgeType};
|
2021-04-24 05:55:15 +00:00
|
|
|
|
2023-02-16 18:54:53 +00:00
|
|
|
/// Given an iterator over some set of revisions, yields the same revisions with
|
|
|
|
/// associated edge types.
|
|
|
|
///
|
|
|
|
/// If a revision's parent is in the input set, then the edge will be "direct".
|
|
|
|
/// Otherwise, there will be one "indirect" edge for each closest ancestor in
|
|
|
|
/// the set, and one "missing" edge for each edge leading outside the set.
|
|
|
|
///
|
|
|
|
/// Example (uppercase characters are in the input set):
|
|
|
|
///
|
|
|
|
/// A A
|
|
|
|
/// |\ |\
|
|
|
|
/// B c B :
|
|
|
|
/// |\| => |\:
|
|
|
|
/// d E ~ E
|
|
|
|
/// |/ ~
|
|
|
|
/// root
|
|
|
|
///
|
|
|
|
/// The implementation works by walking the input iterator one commit at a
|
|
|
|
/// time. It then considers all parents of the commit. It looks ahead in the
|
|
|
|
/// input iterator far enough that all the parents will have been consumed if
|
|
|
|
/// they are in the input (and puts them away so we can emit them later). If a
|
|
|
|
/// parent of the current commit is not in the input set (i.e. it was not
|
|
|
|
/// in the look-ahead), we walk these external commits until we end up back back
|
|
|
|
/// in the input set. That walk may result in consuming more elements from the
|
|
|
|
/// input iterator. In the example above, when we consider "A", we will
|
|
|
|
/// initially look ahead to "B" and "c". When we consider edges from the
|
|
|
|
/// external commit "c", we will further consume the input iterator to "E".
|
|
|
|
///
|
|
|
|
/// Missing edges are those that don't lead back into the input set. If all
|
|
|
|
/// edges from an external commit are missing, we consider the edge to that
|
|
|
|
/// commit to also be missing. In the example above, that means that "B" will
|
|
|
|
/// have a missing edge to "d" rather than to the root.
|
|
|
|
///
|
|
|
|
/// The iterator can be configured to skip transitive edges that it would
|
|
|
|
/// otherwise return. In this mode (which is the default), the edge from "A" to
|
|
|
|
/// "E" in the example above would be excluded because there's also a transitive
|
|
|
|
/// path from "A" to "E" via "B". The implementation of that mode
|
|
|
|
/// adds a filtering step just before yielding the edges for a commit. The
|
|
|
|
/// filtering works by doing a DFS in the simplified graph. That may require
|
|
|
|
/// even more look-ahead. Consider this example (uppercase characters are in the
|
|
|
|
/// input set):
|
|
|
|
///
|
|
|
|
/// J
|
|
|
|
/// /|
|
|
|
|
/// | i
|
|
|
|
/// | |\
|
|
|
|
/// | | H
|
|
|
|
/// G | |
|
|
|
|
/// | e f
|
|
|
|
/// | \|\
|
|
|
|
/// | D |
|
|
|
|
/// \ / c
|
|
|
|
/// b /
|
|
|
|
/// |/
|
|
|
|
/// A
|
|
|
|
/// |
|
|
|
|
/// root
|
|
|
|
///
|
|
|
|
/// When walking from "J", we'll find indirect edges to "H", "G", and "D". This
|
|
|
|
/// is our unfiltered set of edges, before removing transitive edges. In order
|
|
|
|
/// to know that "D" is an ancestor of "H", we need to also walk from "H". We
|
|
|
|
/// use the same search for finding edges from "H" as we used from "J". That
|
|
|
|
/// results in looking ahead all the way to "A". We could reduce the amount of
|
|
|
|
/// look-ahead by stopping at "c" since we're only interested in edges that
|
|
|
|
/// could lead to "D", but that would require extra book-keeping to remember for
|
|
|
|
/// later that the edges from "f" and "H" are only partially computed.
|
2023-02-15 17:33:44 +00:00
|
|
|
pub struct RevsetGraphIterator<'revset, 'index> {
|
2023-02-15 18:16:29 +00:00
|
|
|
input_set_iter: Box<dyn Iterator<Item = IndexEntry<'index>> + 'revset>,
|
2023-02-16 18:54:53 +00:00
|
|
|
/// Commits in the input set we had to take out of the iterator while
|
|
|
|
/// walking external edges. Does not necessarily include the commit
|
|
|
|
/// we're currently about to emit.
|
2023-02-15 17:33:44 +00:00
|
|
|
look_ahead: BTreeMap<IndexPosition, IndexEntry<'index>>,
|
2023-02-16 18:54:53 +00:00
|
|
|
/// The last consumed position. This is always the smallest key in the
|
|
|
|
/// look_ahead map, but it's faster to keep a separate field for it.
|
2021-04-24 05:55:15 +00:00
|
|
|
min_position: IndexPosition,
|
2023-02-16 18:54:53 +00:00
|
|
|
/// Edges for commits not in the input set.
|
2021-04-24 05:55:15 +00:00
|
|
|
// TODO: Remove unneeded entries here as we go (that's why it's an ordered map)?
|
2023-03-19 23:52:33 +00:00
|
|
|
edges: BTreeMap<IndexPosition, HashSet<(IndexPosition, RevsetGraphEdge)>>,
|
2021-04-30 04:43:12 +00:00
|
|
|
skip_transitive_edges: bool,
|
2021-04-24 05:55:15 +00:00
|
|
|
}
|
|
|
|
|
2023-02-15 17:33:44 +00:00
|
|
|
impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> {
|
2023-03-23 01:05:41 +00:00
|
|
|
pub fn new(
|
|
|
|
input_set_iter: Box<dyn Iterator<Item = IndexEntry<'index>> + 'revset>,
|
|
|
|
) -> RevsetGraphIterator<'revset, 'index> {
|
2021-04-24 05:55:15 +00:00
|
|
|
RevsetGraphIterator {
|
2023-03-23 01:05:41 +00:00
|
|
|
input_set_iter,
|
2021-04-24 05:55:15 +00:00
|
|
|
look_ahead: Default::default(),
|
|
|
|
min_position: IndexPosition::MAX,
|
2021-04-30 04:43:12 +00:00
|
|
|
edges: Default::default(),
|
|
|
|
skip_transitive_edges: true,
|
2021-04-24 05:55:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-30 04:43:12 +00:00
|
|
|
pub fn set_skip_transitive_edges(mut self, skip_transitive_edges: bool) -> Self {
|
|
|
|
self.skip_transitive_edges = skip_transitive_edges;
|
|
|
|
self
|
|
|
|
}
|
|
|
|
|
2023-02-15 17:33:44 +00:00
|
|
|
fn next_index_entry(&mut self) -> Option<IndexEntry<'index>> {
|
2022-02-21 06:14:13 +00:00
|
|
|
if let Some(index_entry) = self.look_ahead.pop_last_value() {
|
2021-04-24 05:55:15 +00:00
|
|
|
return Some(index_entry);
|
|
|
|
}
|
|
|
|
self.input_set_iter.next()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn edges_from_internal_commit(
|
|
|
|
&mut self,
|
2023-02-15 17:33:44 +00:00
|
|
|
index_entry: &IndexEntry<'index>,
|
2023-03-19 23:52:33 +00:00
|
|
|
) -> HashSet<(IndexPosition, RevsetGraphEdge)> {
|
2021-04-30 04:43:12 +00:00
|
|
|
if let Some(edges) = self.edges.get(&index_entry.position()) {
|
|
|
|
return edges.clone();
|
|
|
|
}
|
2021-04-24 05:55:15 +00:00
|
|
|
let mut edges = HashSet::new();
|
|
|
|
for parent in index_entry.parents() {
|
|
|
|
let parent_position = parent.position();
|
2023-03-19 23:52:33 +00:00
|
|
|
let parent_commit_id = parent.commit_id();
|
2021-04-24 05:55:15 +00:00
|
|
|
self.consume_to(parent_position);
|
|
|
|
if self.look_ahead.contains_key(&parent_position) {
|
2023-03-19 23:52:33 +00:00
|
|
|
edges.insert((parent_position, RevsetGraphEdge::direct(parent_commit_id)));
|
2021-04-24 05:55:15 +00:00
|
|
|
} else {
|
|
|
|
let parent_edges = self.edges_from_external_commit(parent);
|
|
|
|
if parent_edges
|
|
|
|
.iter()
|
2023-03-19 23:52:33 +00:00
|
|
|
.all(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Missing)
|
2021-04-24 05:55:15 +00:00
|
|
|
{
|
2023-03-19 23:52:33 +00:00
|
|
|
edges.insert((parent_position, RevsetGraphEdge::missing(parent_commit_id)));
|
2021-04-24 05:55:15 +00:00
|
|
|
} else {
|
|
|
|
edges.extend(parent_edges);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-04-30 04:43:12 +00:00
|
|
|
self.edges.insert(index_entry.position(), edges.clone());
|
2021-04-24 05:55:15 +00:00
|
|
|
edges
|
|
|
|
}
|
|
|
|
|
|
|
|
fn edges_from_external_commit(
|
|
|
|
&mut self,
|
2023-02-15 17:33:44 +00:00
|
|
|
index_entry: IndexEntry<'index>,
|
2023-03-19 23:52:33 +00:00
|
|
|
) -> HashSet<(IndexPosition, RevsetGraphEdge)> {
|
2021-04-24 05:55:15 +00:00
|
|
|
let position = index_entry.position();
|
|
|
|
let mut stack = vec![index_entry];
|
|
|
|
while let Some(entry) = stack.last() {
|
|
|
|
let position = entry.position();
|
|
|
|
let mut edges = HashSet::new();
|
|
|
|
let mut parents_complete = true;
|
|
|
|
for parent in entry.parents() {
|
|
|
|
let parent_position = parent.position();
|
|
|
|
self.consume_to(parent_position);
|
|
|
|
if self.look_ahead.contains_key(&parent_position) {
|
|
|
|
// We have found a path back into the input set
|
2023-03-19 23:52:33 +00:00
|
|
|
edges.insert((
|
|
|
|
parent_position,
|
|
|
|
RevsetGraphEdge::indirect(parent.commit_id()),
|
|
|
|
));
|
2021-04-30 04:43:12 +00:00
|
|
|
} else if let Some(parent_edges) = self.edges.get(&parent_position) {
|
2021-04-24 05:55:15 +00:00
|
|
|
if parent_edges
|
|
|
|
.iter()
|
2023-03-19 23:52:33 +00:00
|
|
|
.all(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Missing)
|
2021-04-24 05:55:15 +00:00
|
|
|
{
|
2023-03-19 23:52:33 +00:00
|
|
|
edges.insert((
|
|
|
|
parent_position,
|
|
|
|
RevsetGraphEdge::missing(parent.commit_id()),
|
|
|
|
));
|
2021-04-24 05:55:15 +00:00
|
|
|
} else {
|
|
|
|
edges.extend(parent_edges.iter().cloned());
|
|
|
|
}
|
|
|
|
} else if parent_position < self.min_position {
|
|
|
|
// The parent is not in the input set
|
2023-03-19 23:52:33 +00:00
|
|
|
edges.insert((
|
|
|
|
parent_position,
|
|
|
|
RevsetGraphEdge::missing(parent.commit_id()),
|
|
|
|
));
|
2021-04-24 05:55:15 +00:00
|
|
|
} else {
|
|
|
|
// The parent is not in the input set but it's somewhere in the range
|
|
|
|
// where we have commits in the input set, so continue searching.
|
|
|
|
stack.push(parent);
|
|
|
|
parents_complete = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if parents_complete {
|
|
|
|
stack.pop().unwrap();
|
2021-04-30 04:43:12 +00:00
|
|
|
self.edges.insert(position, edges);
|
2021-04-24 05:55:15 +00:00
|
|
|
}
|
|
|
|
}
|
2021-04-30 04:43:12 +00:00
|
|
|
self.edges.get(&position).unwrap().clone()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn remove_transitive_edges(
|
|
|
|
&mut self,
|
2023-03-19 23:52:33 +00:00
|
|
|
edges: HashSet<(IndexPosition, RevsetGraphEdge)>,
|
|
|
|
) -> HashSet<(IndexPosition, RevsetGraphEdge)> {
|
2021-04-30 04:43:12 +00:00
|
|
|
if !edges
|
|
|
|
.iter()
|
2023-03-19 23:52:33 +00:00
|
|
|
.any(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Indirect)
|
2021-04-30 04:43:12 +00:00
|
|
|
{
|
|
|
|
return edges;
|
|
|
|
}
|
|
|
|
let mut min_generation = u32::MAX;
|
|
|
|
let mut initial_targets = HashSet::new();
|
|
|
|
let mut work = vec![];
|
|
|
|
// To start with, add the edges one step after the input edges.
|
2023-03-19 23:52:33 +00:00
|
|
|
for (target, edge) in &edges {
|
|
|
|
initial_targets.insert(target);
|
2021-04-30 04:43:12 +00:00
|
|
|
if edge.edge_type != RevsetGraphEdgeType::Missing {
|
2023-03-19 23:52:33 +00:00
|
|
|
let entry = self.look_ahead.get(target).unwrap().clone();
|
2021-04-30 04:43:12 +00:00
|
|
|
min_generation = min(min_generation, entry.generation_number());
|
|
|
|
work.extend(self.edges_from_internal_commit(&entry));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Find commits reachable transitively and add them to the `unwanted` set.
|
|
|
|
let mut unwanted = HashSet::new();
|
2023-03-19 23:52:33 +00:00
|
|
|
while let Some((target, edge)) = work.pop() {
|
|
|
|
if edge.edge_type == RevsetGraphEdgeType::Missing || target < self.min_position {
|
2021-04-30 04:43:12 +00:00
|
|
|
continue;
|
|
|
|
}
|
2023-03-19 23:52:33 +00:00
|
|
|
if !unwanted.insert(target) {
|
2021-04-30 04:43:12 +00:00
|
|
|
// Already visited
|
|
|
|
continue;
|
|
|
|
}
|
2023-03-19 23:52:33 +00:00
|
|
|
if initial_targets.contains(&target) {
|
2021-04-30 04:43:12 +00:00
|
|
|
// Already visited
|
|
|
|
continue;
|
|
|
|
}
|
2023-03-19 23:52:33 +00:00
|
|
|
let entry = self.look_ahead.get(&target).unwrap().clone();
|
2021-04-30 04:43:12 +00:00
|
|
|
if entry.generation_number() < min_generation {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
work.extend(self.edges_from_internal_commit(&entry));
|
|
|
|
}
|
|
|
|
|
|
|
|
edges
|
|
|
|
.into_iter()
|
2023-03-19 23:52:33 +00:00
|
|
|
.filter(|(target, _)| !unwanted.contains(target))
|
2021-04-30 04:43:12 +00:00
|
|
|
.collect()
|
2021-04-24 05:55:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn consume_to(&mut self, pos: IndexPosition) {
|
|
|
|
while pos < self.min_position {
|
|
|
|
if let Some(next_entry) = self.input_set_iter.next() {
|
|
|
|
let next_position = next_entry.position();
|
|
|
|
self.look_ahead.insert(next_position, next_entry);
|
|
|
|
self.min_position = next_position;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-15 17:33:44 +00:00
|
|
|
impl<'revset, 'index> Iterator for RevsetGraphIterator<'revset, 'index> {
|
2023-03-20 00:44:50 +00:00
|
|
|
type Item = (CommitId, Vec<RevsetGraphEdge>);
|
2021-04-24 05:55:15 +00:00
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
2022-04-23 01:01:49 +00:00
|
|
|
let index_entry = self.next_index_entry()?;
|
|
|
|
let mut edges = self.edges_from_internal_commit(&index_entry);
|
|
|
|
if self.skip_transitive_edges {
|
|
|
|
edges = self.remove_transitive_edges(edges);
|
2021-04-24 05:55:15 +00:00
|
|
|
}
|
2022-04-23 01:01:49 +00:00
|
|
|
let mut edges: Vec<_> = edges.into_iter().collect();
|
2023-03-19 23:52:33 +00:00
|
|
|
edges.sort_by(|(target_pos1, _), (target_pos2, _)| target_pos2.cmp(target_pos1));
|
|
|
|
let edges = edges.into_iter().map(|(_, edge)| edge).collect();
|
2023-03-20 00:44:50 +00:00
|
|
|
Some((index_entry.commit_id(), edges))
|
2021-04-24 05:55:15 +00:00
|
|
|
}
|
|
|
|
}
|