2022-11-26 23:57:50 +00:00
|
|
|
// Copyright 2021 The Jujutsu Authors
|
2021-04-24 05:55:15 +00:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2021-04-30 04:43:12 +00:00
|
|
|
use std::cmp::min;
|
2023-03-14 05:14:06 +00:00
|
|
|
use std::collections::{BTreeMap, HashSet};
|
2021-04-24 05:55:15 +00:00
|
|
|
|
2023-03-20 00:44:50 +00:00
|
|
|
use crate::backend::CommitId;
|
2023-03-01 05:50:04 +00:00
|
|
|
use crate::default_index_store::{IndexEntry, IndexPosition};
|
2022-02-21 05:14:48 +00:00
|
|
|
use crate::nightly_shims::BTreeMapExt;
|
2023-03-23 01:05:41 +00:00
|
|
|
use crate::revset::{RevsetGraphEdge, RevsetGraphEdgeType};
|
2021-04-24 05:55:15 +00:00
|
|
|
|
|
|
|
// Given an iterator over some set of revisions, yields the same revisions with
|
|
|
|
// associated edge types.
|
|
|
|
//
|
|
|
|
// If a revision's parent is in the input set, then the edge will be "direct".
|
|
|
|
// Otherwise, there will be one "indirect" edge for each closest ancestor in the
|
|
|
|
// set, and one "missing" edge for each edge leading outside the set.
|
|
|
|
//
|
|
|
|
// Example (uppercase characters are in the input set):
|
|
|
|
//
|
|
|
|
// A A
|
|
|
|
// |\ |\
|
|
|
|
// B c B :
|
|
|
|
// |\| => |\:
|
|
|
|
// d E ~ E
|
|
|
|
// |/ ~
|
|
|
|
// root
|
|
|
|
//
|
|
|
|
// The implementation works by walking the input iterator in one commit at a
|
|
|
|
// time. It then considers all parents of the commit. It looks ahead in the
|
|
|
|
// input iterator far enough that all the parents will have been consumed if
|
|
|
|
// they are in the input (and puts them away so we can emit them later). If a
|
|
|
|
// parent of the current commit is not in the input set (i.e. it was not
|
|
|
|
// in the look-ahead), we walk these external commits until we end up back back
|
|
|
|
// in the input set. That walk may result in consuming more elements from the
|
|
|
|
// input iterator. In the example above, when we consider "A", we will initially
|
|
|
|
// look ahead to "B" and "c". When we consider edges from the external commit
|
|
|
|
// "c", we will further consume the input iterator to "E".
|
|
|
|
//
|
|
|
|
// Missing edges are those that don't lead back into the input set. If all edges
|
|
|
|
// from an external commit are missing, we consider the edge to that edge to
|
|
|
|
// also be missing. In the example above, that means that "B" will have a
|
|
|
|
// missing edge to "d" rather than to the root.
|
2021-04-30 04:43:12 +00:00
|
|
|
//
|
|
|
|
// The iterator can be configured to skip transitive edges that it would
|
|
|
|
// otherwise return. In this mode (which is the default), the edge from "A" to
|
|
|
|
// "E" in the example above would be excluded because there's also a transitive
|
|
|
|
// path from "A" to "E" via "B". The implementation of that mode
|
|
|
|
// adds a filtering step just before yielding the edges for a commit. The
|
|
|
|
// filtering works doing a DFS in the simplified graph. That may require even
|
|
|
|
// more look-ahead. Consider this example (uppercase characters are in the input
|
|
|
|
// set):
|
|
|
|
//
|
|
|
|
// J
|
|
|
|
// /|
|
|
|
|
// | i
|
|
|
|
// | |\
|
|
|
|
// | | H
|
|
|
|
// G | |
|
|
|
|
// | e f
|
|
|
|
// | \|\
|
|
|
|
// | D |
|
|
|
|
// \ / c
|
|
|
|
// b /
|
|
|
|
// |/
|
|
|
|
// A
|
|
|
|
// |
|
|
|
|
// root
|
|
|
|
//
|
|
|
|
// When walking from "J", we'll find indirect edges to "H", "G", and "D". This
|
|
|
|
// is our unfiltered set of edges, before removing transitive edges. In order to
|
|
|
|
// know that "D" is an ancestor of "H", we need to also walk from "H". We use
|
|
|
|
// the same search for finding edges from "H" as we used from "J". That results
|
|
|
|
// in looking ahead all the way to "A". We could reduce the amount of look-ahead
|
|
|
|
// by stopping at "c" since we're only interested in edges that could lead to
|
|
|
|
// "D", but that would require extra book-keeping to remember for later that the
|
|
|
|
// edges from "f" and "H" are only partially computed.
|
2023-02-15 17:33:44 +00:00
|
|
|
pub struct RevsetGraphIterator<'revset, 'index> {
|
2023-02-15 18:16:29 +00:00
|
|
|
input_set_iter: Box<dyn Iterator<Item = IndexEntry<'index>> + 'revset>,
|
2021-04-24 05:55:15 +00:00
|
|
|
// Commits in the input set we had to take out of the iterator while walking external
|
|
|
|
// edges. Does not necessarily include the commit we're currently about to emit.
|
2023-02-15 17:33:44 +00:00
|
|
|
look_ahead: BTreeMap<IndexPosition, IndexEntry<'index>>,
|
2021-04-24 05:55:15 +00:00
|
|
|
// The last consumed position. This is always the smallest key in the look_ahead map, but it's
|
|
|
|
// faster to keep a separate field for it.
|
|
|
|
min_position: IndexPosition,
|
|
|
|
// Edges for commits not in the input set.
|
|
|
|
// TODO: Remove unneeded entries here as we go (that's why it's an ordered map)?
|
2023-03-19 23:52:33 +00:00
|
|
|
edges: BTreeMap<IndexPosition, HashSet<(IndexPosition, RevsetGraphEdge)>>,
|
2021-04-30 04:43:12 +00:00
|
|
|
skip_transitive_edges: bool,
|
2021-04-24 05:55:15 +00:00
|
|
|
}
|
|
|
|
|
2023-02-15 17:33:44 +00:00
|
|
|
impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> {
|
2023-03-23 01:05:41 +00:00
|
|
|
pub fn new(
|
|
|
|
input_set_iter: Box<dyn Iterator<Item = IndexEntry<'index>> + 'revset>,
|
|
|
|
) -> RevsetGraphIterator<'revset, 'index> {
|
2021-04-24 05:55:15 +00:00
|
|
|
RevsetGraphIterator {
|
2023-03-23 01:05:41 +00:00
|
|
|
input_set_iter,
|
2021-04-24 05:55:15 +00:00
|
|
|
look_ahead: Default::default(),
|
|
|
|
min_position: IndexPosition::MAX,
|
2021-04-30 04:43:12 +00:00
|
|
|
edges: Default::default(),
|
|
|
|
skip_transitive_edges: true,
|
2021-04-24 05:55:15 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-30 04:43:12 +00:00
|
|
|
pub fn set_skip_transitive_edges(mut self, skip_transitive_edges: bool) -> Self {
|
|
|
|
self.skip_transitive_edges = skip_transitive_edges;
|
|
|
|
self
|
|
|
|
}
|
|
|
|
|
2023-02-15 17:33:44 +00:00
|
|
|
fn next_index_entry(&mut self) -> Option<IndexEntry<'index>> {
|
2022-02-21 06:14:13 +00:00
|
|
|
if let Some(index_entry) = self.look_ahead.pop_last_value() {
|
2021-04-24 05:55:15 +00:00
|
|
|
return Some(index_entry);
|
|
|
|
}
|
|
|
|
self.input_set_iter.next()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn edges_from_internal_commit(
|
|
|
|
&mut self,
|
2023-02-15 17:33:44 +00:00
|
|
|
index_entry: &IndexEntry<'index>,
|
2023-03-19 23:52:33 +00:00
|
|
|
) -> HashSet<(IndexPosition, RevsetGraphEdge)> {
|
2021-04-30 04:43:12 +00:00
|
|
|
if let Some(edges) = self.edges.get(&index_entry.position()) {
|
|
|
|
return edges.clone();
|
|
|
|
}
|
2021-04-24 05:55:15 +00:00
|
|
|
let mut edges = HashSet::new();
|
|
|
|
for parent in index_entry.parents() {
|
|
|
|
let parent_position = parent.position();
|
2023-03-19 23:52:33 +00:00
|
|
|
let parent_commit_id = parent.commit_id();
|
2021-04-24 05:55:15 +00:00
|
|
|
self.consume_to(parent_position);
|
|
|
|
if self.look_ahead.contains_key(&parent_position) {
|
2023-03-19 23:52:33 +00:00
|
|
|
edges.insert((parent_position, RevsetGraphEdge::direct(parent_commit_id)));
|
2021-04-24 05:55:15 +00:00
|
|
|
} else {
|
|
|
|
let parent_edges = self.edges_from_external_commit(parent);
|
|
|
|
if parent_edges
|
|
|
|
.iter()
|
2023-03-19 23:52:33 +00:00
|
|
|
.all(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Missing)
|
2021-04-24 05:55:15 +00:00
|
|
|
{
|
2023-03-19 23:52:33 +00:00
|
|
|
edges.insert((parent_position, RevsetGraphEdge::missing(parent_commit_id)));
|
2021-04-24 05:55:15 +00:00
|
|
|
} else {
|
|
|
|
edges.extend(parent_edges);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-04-30 04:43:12 +00:00
|
|
|
self.edges.insert(index_entry.position(), edges.clone());
|
2021-04-24 05:55:15 +00:00
|
|
|
edges
|
|
|
|
}
|
|
|
|
|
|
|
|
fn edges_from_external_commit(
|
|
|
|
&mut self,
|
2023-02-15 17:33:44 +00:00
|
|
|
index_entry: IndexEntry<'index>,
|
2023-03-19 23:52:33 +00:00
|
|
|
) -> HashSet<(IndexPosition, RevsetGraphEdge)> {
|
2021-04-24 05:55:15 +00:00
|
|
|
let position = index_entry.position();
|
|
|
|
let mut stack = vec![index_entry];
|
|
|
|
while let Some(entry) = stack.last() {
|
|
|
|
let position = entry.position();
|
|
|
|
let mut edges = HashSet::new();
|
|
|
|
let mut parents_complete = true;
|
|
|
|
for parent in entry.parents() {
|
|
|
|
let parent_position = parent.position();
|
|
|
|
self.consume_to(parent_position);
|
|
|
|
if self.look_ahead.contains_key(&parent_position) {
|
|
|
|
// We have found a path back into the input set
|
2023-03-19 23:52:33 +00:00
|
|
|
edges.insert((
|
|
|
|
parent_position,
|
|
|
|
RevsetGraphEdge::indirect(parent.commit_id()),
|
|
|
|
));
|
2021-04-30 04:43:12 +00:00
|
|
|
} else if let Some(parent_edges) = self.edges.get(&parent_position) {
|
2021-04-24 05:55:15 +00:00
|
|
|
if parent_edges
|
|
|
|
.iter()
|
2023-03-19 23:52:33 +00:00
|
|
|
.all(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Missing)
|
2021-04-24 05:55:15 +00:00
|
|
|
{
|
2023-03-19 23:52:33 +00:00
|
|
|
edges.insert((
|
|
|
|
parent_position,
|
|
|
|
RevsetGraphEdge::missing(parent.commit_id()),
|
|
|
|
));
|
2021-04-24 05:55:15 +00:00
|
|
|
} else {
|
|
|
|
edges.extend(parent_edges.iter().cloned());
|
|
|
|
}
|
|
|
|
} else if parent_position < self.min_position {
|
|
|
|
// The parent is not in the input set
|
2023-03-19 23:52:33 +00:00
|
|
|
edges.insert((
|
|
|
|
parent_position,
|
|
|
|
RevsetGraphEdge::missing(parent.commit_id()),
|
|
|
|
));
|
2021-04-24 05:55:15 +00:00
|
|
|
} else {
|
|
|
|
// The parent is not in the input set but it's somewhere in the range
|
|
|
|
// where we have commits in the input set, so continue searching.
|
|
|
|
stack.push(parent);
|
|
|
|
parents_complete = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if parents_complete {
|
|
|
|
stack.pop().unwrap();
|
2021-04-30 04:43:12 +00:00
|
|
|
self.edges.insert(position, edges);
|
2021-04-24 05:55:15 +00:00
|
|
|
}
|
|
|
|
}
|
2021-04-30 04:43:12 +00:00
|
|
|
self.edges.get(&position).unwrap().clone()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn remove_transitive_edges(
|
|
|
|
&mut self,
|
2023-03-19 23:52:33 +00:00
|
|
|
edges: HashSet<(IndexPosition, RevsetGraphEdge)>,
|
|
|
|
) -> HashSet<(IndexPosition, RevsetGraphEdge)> {
|
2021-04-30 04:43:12 +00:00
|
|
|
if !edges
|
|
|
|
.iter()
|
2023-03-19 23:52:33 +00:00
|
|
|
.any(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Indirect)
|
2021-04-30 04:43:12 +00:00
|
|
|
{
|
|
|
|
return edges;
|
|
|
|
}
|
|
|
|
let mut min_generation = u32::MAX;
|
|
|
|
let mut initial_targets = HashSet::new();
|
|
|
|
let mut work = vec![];
|
|
|
|
// To start with, add the edges one step after the input edges.
|
2023-03-19 23:52:33 +00:00
|
|
|
for (target, edge) in &edges {
|
|
|
|
initial_targets.insert(target);
|
2021-04-30 04:43:12 +00:00
|
|
|
if edge.edge_type != RevsetGraphEdgeType::Missing {
|
2023-03-19 23:52:33 +00:00
|
|
|
let entry = self.look_ahead.get(target).unwrap().clone();
|
2021-04-30 04:43:12 +00:00
|
|
|
min_generation = min(min_generation, entry.generation_number());
|
|
|
|
work.extend(self.edges_from_internal_commit(&entry));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Find commits reachable transitively and add them to the `unwanted` set.
|
|
|
|
let mut unwanted = HashSet::new();
|
2023-03-19 23:52:33 +00:00
|
|
|
while let Some((target, edge)) = work.pop() {
|
|
|
|
if edge.edge_type == RevsetGraphEdgeType::Missing || target < self.min_position {
|
2021-04-30 04:43:12 +00:00
|
|
|
continue;
|
|
|
|
}
|
2023-03-19 23:52:33 +00:00
|
|
|
if !unwanted.insert(target) {
|
2021-04-30 04:43:12 +00:00
|
|
|
// Already visited
|
|
|
|
continue;
|
|
|
|
}
|
2023-03-19 23:52:33 +00:00
|
|
|
if initial_targets.contains(&target) {
|
2021-04-30 04:43:12 +00:00
|
|
|
// Already visited
|
|
|
|
continue;
|
|
|
|
}
|
2023-03-19 23:52:33 +00:00
|
|
|
let entry = self.look_ahead.get(&target).unwrap().clone();
|
2021-04-30 04:43:12 +00:00
|
|
|
if entry.generation_number() < min_generation {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
work.extend(self.edges_from_internal_commit(&entry));
|
|
|
|
}
|
|
|
|
|
|
|
|
edges
|
|
|
|
.into_iter()
|
2023-03-19 23:52:33 +00:00
|
|
|
.filter(|(target, _)| !unwanted.contains(target))
|
2021-04-30 04:43:12 +00:00
|
|
|
.collect()
|
2021-04-24 05:55:15 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn consume_to(&mut self, pos: IndexPosition) {
|
|
|
|
while pos < self.min_position {
|
|
|
|
if let Some(next_entry) = self.input_set_iter.next() {
|
|
|
|
let next_position = next_entry.position();
|
|
|
|
self.look_ahead.insert(next_position, next_entry);
|
|
|
|
self.min_position = next_position;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-15 17:33:44 +00:00
|
|
|
impl<'revset, 'index> Iterator for RevsetGraphIterator<'revset, 'index> {
|
2023-03-20 00:44:50 +00:00
|
|
|
type Item = (CommitId, Vec<RevsetGraphEdge>);
|
2021-04-24 05:55:15 +00:00
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
2022-04-23 01:01:49 +00:00
|
|
|
let index_entry = self.next_index_entry()?;
|
|
|
|
let mut edges = self.edges_from_internal_commit(&index_entry);
|
|
|
|
if self.skip_transitive_edges {
|
|
|
|
edges = self.remove_transitive_edges(edges);
|
2021-04-24 05:55:15 +00:00
|
|
|
}
|
2022-04-23 01:01:49 +00:00
|
|
|
let mut edges: Vec<_> = edges.into_iter().collect();
|
2023-03-19 23:52:33 +00:00
|
|
|
edges.sort_by(|(target_pos1, _), (target_pos2, _)| target_pos2.cmp(target_pos1));
|
|
|
|
let edges = edges.into_iter().map(|(_, edge)| edge).collect();
|
2023-03-20 00:44:50 +00:00
|
|
|
Some((index_entry.commit_id(), edges))
|
2021-04-24 05:55:15 +00:00
|
|
|
}
|
|
|
|
}
|