revsets: add iterator adapter for rendering simplified graph of set

When rendering a non-contiguous subset of the commits, we want to
still show the connections between the commits in the graph, even
though they're not directly connected. This commit introduces an
adaptor for the revset iterators that also yield the edges to show in
such a simplified graph.

This has no measurable impact on `jj log -r ,,v2.0.0` in the git.git
repo.


The output of `jj log -r 'v1.0.0 | v2.0.0'` now looks like this:

```
o   e156455ea491 e156455ea491 gitster@pobox.com 2014-05-28 11:04:19.000 -07:00 refs/tags/v2.0.0
:\  Git 2.0
: ~
o c2f3bf071ee9 c2f3bf071ee9 junkio@cox.net 2005-12-21 00:01:00.000 -08:00 refs/tags/v1.0.0
~ GIT 1.0.0
```

Before this commit, it looked like this:

```
o e156455ea491 e156455ea491 gitster@pobox.com 2014-05-28 11:04:19.000 -07:00 refs/tags/v2.0.0
| Git 2.0
| o   c2f3bf071ee9 c2f3bf071ee9 junkio@cox.net 2005-12-21 00:01:00.000 -08:00 refs/tags/v1.0.0
| |\  GIT 1.0.0
```

The output of `jj log -r 'git_refs()'` in the git.git repo is still
completely useless (it's >350k lines and >500MB of data). I think
that's because we don't filter out edges to ancestors that we have
transitive edges to. Mercurial also doesn't filter out such edges, but
Git (with `--simplify-by-decoration`) seems to filter them out. I'll
change it soon so we filter them out.
This commit is contained in:
Martin von Zweigbergk 2021-04-23 22:55:15 -07:00
parent b953d7d801
commit 33da97f0bf
6 changed files with 519 additions and 7 deletions

View file

@ -35,6 +35,10 @@ use crate::store::{ChangeId, CommitId};
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
pub struct IndexPosition(u32);
impl IndexPosition {
pub const MAX: Self = IndexPosition(u32::MAX);
}
#[derive(Clone)]
pub enum IndexRef<'a> {
Readonly(&'a ReadonlyIndex),
@ -987,7 +991,7 @@ pub struct IndexStats {
}
#[derive(Clone, Eq, PartialEq)]
struct IndexEntryByPosition<'a>(IndexEntry<'a>);
pub struct IndexEntryByPosition<'a>(IndexEntry<'a>);
impl Ord for IndexEntryByPosition<'_> {
fn cmp(&self, other: &Self) -> Ordering {
@ -1333,6 +1337,7 @@ impl Debug for IndexEntry<'_> {
f.debug_struct("IndexEntry")
.field("pos", &self.pos)
.field("local_pos", &self.local_pos)
.field("commit_id", &self.commit_id().hex())
.finish()
}
}

View file

@ -45,6 +45,7 @@ pub mod protos;
pub mod repo;
pub mod repo_path;
pub mod revset;
pub mod revset_graph_iterator;
pub mod rewrite;
pub mod settings;
pub mod simple_op_store;

View file

@ -24,6 +24,7 @@ use thiserror::Error;
use crate::commit::Commit;
use crate::index::{HexPrefix, IndexEntry, IndexPosition, PrefixResolution, RevWalk};
use crate::repo::RepoRef;
use crate::revset_graph_iterator::RevsetGraphIterator;
use crate::store::{CommitId, StoreError};
#[derive(Debug, Error, PartialEq, Eq)]
@ -493,6 +494,10 @@ impl<'revset, 'repo> RevsetIterator<'revset, 'repo> {
fn new(inner: Box<dyn Iterator<Item = IndexEntry<'repo>> + 'revset>) -> Self {
Self { inner }
}
pub fn graph(self) -> RevsetGraphIterator<'revset, 'repo> {
RevsetGraphIterator::new(self)
}
}
impl<'repo> Iterator for RevsetIterator<'_, 'repo> {
@ -896,6 +901,19 @@ fn non_obsolete_heads<'revset, 'repo: 'revset>(
Box::new(EagerRevset { index_entries })
}
pub fn revset_for_commits<'revset, 'repo: 'revset>(
repo: RepoRef<'repo>,
commits: &[&Commit],
) -> Box<dyn Revset<'repo> + 'revset> {
let index = repo.index();
let mut index_entries: Vec<_> = commits
.iter()
.map(|commit| index.entry_by_id(commit.id()).unwrap())
.collect();
index_entries.sort_by_key(|b| Reverse(b.position()));
Box::new(EagerRevset { index_entries })
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -0,0 +1,208 @@
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use std::collections::{BTreeMap, HashSet};
use crate::index::{IndexEntry, IndexPosition};
use crate::revset::RevsetIterator;
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub struct RevsetGraphEdge {
pub target: IndexPosition,
pub edge_type: RevsetGraphEdgeType,
}
impl RevsetGraphEdge {
pub fn missing(target: IndexPosition) -> Self {
Self {
target,
edge_type: RevsetGraphEdgeType::Missing,
}
}
pub fn direct(target: IndexPosition) -> Self {
Self {
target,
edge_type: RevsetGraphEdgeType::Direct,
}
}
pub fn indirect(target: IndexPosition) -> Self {
Self {
target,
edge_type: RevsetGraphEdgeType::Indirect,
}
}
}
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
pub enum RevsetGraphEdgeType {
Missing,
Direct,
Indirect,
}
// Given an iterator over some set of revisions, yields the same revisions with
// associated edge types.
//
// If a revision's parent is in the input set, then the edge will be "direct".
// Otherwise, there will be one "indirect" edge for each closest ancestor in the
// set, and one "missing" edge for each edge leading outside the set.
//
// Example (uppercase characters are in the input set):
//
// A A
// |\ |\
// B c B :
// |\| => |\:
// d E ~ E
// |/ ~
// root
//
// The implementation works by walking the input iterator in one commit at a
// time. It then considers all parents of the commit. It looks ahead in the
// input iterator far enough that all the parents will have been consumed if
// they are in the input (and puts them away so we can emit them later). If a
// parent of the current commit is not in the input set (i.e. it was not
// in the look-ahead), we walk these external commits until we end up back back
// in the input set. That walk may result in consuming more elements from the
// input iterator. In the example above, when we consider "A", we will initially
// look ahead to "B" and "c". When we consider edges from the external commit
// "c", we will further consume the input iterator to "E".
//
// Missing edges are those that don't lead back into the input set. If all edges
// from an external commit are missing, we consider the edge to that edge to
// also be missing. In the example above, that means that "B" will have a
// missing edge to "d" rather than to the root.
pub struct RevsetGraphIterator<'revset, 'repo> {
input_set_iter: RevsetIterator<'revset, 'repo>,
// Commits in the input set we had to take out of the iterator while walking external
// edges. Does not necessarily include the commit we're currently about to emit.
look_ahead: BTreeMap<IndexPosition, IndexEntry<'repo>>,
// The last consumed position. This is always the smallest key in the look_ahead map, but it's
// faster to keep a separate field for it.
min_position: IndexPosition,
// Edges for commits not in the input set.
// TODO: Remove unneeded entries here as we go (that's why it's an ordered map)?
external_commits: BTreeMap<IndexPosition, HashSet<RevsetGraphEdge>>,
}
impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> {
pub fn new(iter: RevsetIterator<'revset, 'repo>) -> RevsetGraphIterator<'revset, 'repo> {
RevsetGraphIterator {
input_set_iter: iter,
look_ahead: Default::default(),
min_position: IndexPosition::MAX,
external_commits: Default::default(),
}
}
fn next_index_entry(&mut self) -> Option<IndexEntry<'repo>> {
if let Some((_, index_entry)) = self.look_ahead.pop_last() {
return Some(index_entry);
}
self.input_set_iter.next()
}
fn edges_from_internal_commit(
&mut self,
index_entry: &IndexEntry<'repo>,
) -> HashSet<RevsetGraphEdge> {
let mut edges = HashSet::new();
for parent in index_entry.parents() {
let parent_position = parent.position();
self.consume_to(parent_position);
if self.look_ahead.contains_key(&parent_position) {
edges.insert(RevsetGraphEdge::direct(parent_position));
} else {
let parent_edges = self.edges_from_external_commit(parent);
if parent_edges
.iter()
.all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing)
{
edges.insert(RevsetGraphEdge::missing(parent_position));
} else {
edges.extend(parent_edges);
}
}
}
edges
}
fn edges_from_external_commit(
&mut self,
index_entry: IndexEntry<'repo>,
) -> HashSet<RevsetGraphEdge> {
let position = index_entry.position();
let mut stack = vec![index_entry];
while let Some(entry) = stack.last() {
let position = entry.position();
let mut edges = HashSet::new();
let mut parents_complete = true;
for parent in entry.parents() {
let parent_position = parent.position();
self.consume_to(parent_position);
if self.look_ahead.contains_key(&parent_position) {
// We have found a path back into the input set
edges.insert(RevsetGraphEdge::indirect(parent_position));
} else if let Some(parent_edges) = self.external_commits.get(&parent_position) {
if parent_edges
.iter()
.all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing)
{
edges.insert(RevsetGraphEdge::missing(parent_position));
} else {
edges.extend(parent_edges.iter().cloned());
}
} else if parent_position < self.min_position {
// The parent is not in the input set
edges.insert(RevsetGraphEdge::missing(parent_position));
} else {
// The parent is not in the input set but it's somewhere in the range
// where we have commits in the input set, so continue searching.
stack.push(parent);
parents_complete = false;
}
}
if parents_complete {
stack.pop().unwrap();
self.external_commits.insert(position, edges);
}
}
self.external_commits.get(&position).unwrap().clone()
}
fn consume_to(&mut self, pos: IndexPosition) {
while pos < self.min_position {
if let Some(next_entry) = self.input_set_iter.next() {
let next_position = next_entry.position();
self.look_ahead.insert(next_position, next_entry);
self.min_position = next_position;
} else {
break;
}
}
}
}
impl<'revset, 'repo> Iterator for RevsetGraphIterator<'revset, 'repo> {
type Item = (IndexEntry<'repo>, HashSet<RevsetGraphEdge>);
fn next(&mut self) -> Option<Self::Item> {
if let Some(index_entry) = self.next_index_entry() {
let edges = self.edges_from_internal_commit(&index_entry);
Some((index_entry, edges))
} else {
None
}
}
}

View file

@ -0,0 +1,261 @@
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
use jujube_lib::revset::revset_for_commits;
use jujube_lib::revset_graph_iterator::RevsetGraphEdge;
use jujube_lib::testutils;
use jujube_lib::testutils::CommitGraphBuilder;
use maplit::hashset;
#[test]
fn test_graph_iterator_linearized() {
let settings = testutils::user_settings();
let (_temp_dir, repo) = testutils::init_repo(&settings, true);
let mut tx = repo.start_transaction("test");
let mut_repo = tx.mut_repo();
// Tests that a fork and a merge becomes a single edge:
// D
// |\ D
// b c => :
// |/ A
// A ~
// |
// root
let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo);
let commit_a = graph_builder.initial_commit();
let commit_b = graph_builder.commit_with_parents(&[&commit_a]);
let commit_c = graph_builder.commit_with_parents(&[&commit_a]);
let commit_d = graph_builder.commit_with_parents(&[&commit_b, &commit_c]);
let pos_root = mut_repo
.index()
.commit_id_to_pos(repo.store().root_commit_id())
.unwrap();
let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap();
let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_a, &commit_d]);
let commits: Vec<_> = revset.iter().graph().collect();
drop(revset);
assert_eq!(commits.len(), 2);
assert_eq!(commits[0].0.commit_id(), *commit_d.id());
assert_eq!(commits[1].0.commit_id(), *commit_a.id());
assert_eq!(commits[0].1, hashset![RevsetGraphEdge::indirect(pos_a)]);
assert_eq!(commits[1].1, hashset![RevsetGraphEdge::missing(pos_root)]);
tx.discard();
}
#[test]
fn test_graph_iterator_virtual_octopus() {
let settings = testutils::user_settings();
let (_temp_dir, repo) = testutils::init_repo(&settings, true);
let mut tx = repo.start_transaction("test");
let mut_repo = tx.mut_repo();
// Tests that merges outside the set can result in more parent edges than there
// was in the input: F
// |\
// d e F
// |\|\ => /|\
// A B C A B C
// \|/ ~ ~ ~
// root
let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo);
let commit_a = graph_builder.initial_commit();
let commit_b = graph_builder.initial_commit();
let commit_c = graph_builder.initial_commit();
let commit_d = graph_builder.commit_with_parents(&[&commit_a, &commit_b]);
let commit_e = graph_builder.commit_with_parents(&[&commit_b, &commit_c]);
let commit_f = graph_builder.commit_with_parents(&[&commit_d, &commit_e]);
let pos_root = mut_repo
.index()
.commit_id_to_pos(repo.store().root_commit_id())
.unwrap();
let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap();
let pos_b = mut_repo.index().commit_id_to_pos(commit_b.id()).unwrap();
let pos_c = mut_repo.index().commit_id_to_pos(commit_c.id()).unwrap();
let revset = revset_for_commits(
mut_repo.as_repo_ref(),
&[&commit_a, &commit_b, &commit_c, &commit_f],
);
let commits: Vec<_> = revset.iter().graph().collect();
drop(revset);
assert_eq!(commits.len(), 4);
assert_eq!(commits[0].0.commit_id(), *commit_f.id());
assert_eq!(commits[1].0.commit_id(), *commit_c.id());
assert_eq!(commits[2].0.commit_id(), *commit_b.id());
assert_eq!(commits[3].0.commit_id(), *commit_a.id());
assert_eq!(
commits[0].1,
hashset![
RevsetGraphEdge::indirect(pos_a),
RevsetGraphEdge::indirect(pos_b),
RevsetGraphEdge::indirect(pos_c),
]
);
assert_eq!(commits[1].1, hashset![RevsetGraphEdge::missing(pos_root)]);
assert_eq!(commits[2].1, hashset![RevsetGraphEdge::missing(pos_root)]);
assert_eq!(commits[3].1, hashset![RevsetGraphEdge::missing(pos_root)]);
tx.discard();
}
#[test]
fn test_graph_iterator_simple_fork() {
let settings = testutils::user_settings();
let (_temp_dir, repo) = testutils::init_repo(&settings, true);
let mut tx = repo.start_transaction("test");
let mut_repo = tx.mut_repo();
// Tests that the branch with "C" gets emitted correctly:
// E
// |
// d
// | C E C
// |/ |/
// b => A
// | ~
// A
// |
// root
let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo);
let commit_a = graph_builder.initial_commit();
let commit_b = graph_builder.commit_with_parents(&[&commit_a]);
let commit_c = graph_builder.commit_with_parents(&[&commit_b]);
let commit_d = graph_builder.commit_with_parents(&[&commit_b]);
let commit_e = graph_builder.commit_with_parents(&[&commit_d]);
let pos_root = mut_repo
.index()
.commit_id_to_pos(repo.store().root_commit_id())
.unwrap();
let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap();
let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_a, &commit_c, &commit_e]);
let commits: Vec<_> = revset.iter().graph().collect();
drop(revset);
assert_eq!(commits.len(), 3);
assert_eq!(commits[0].0.commit_id(), *commit_e.id());
assert_eq!(commits[1].0.commit_id(), *commit_c.id());
assert_eq!(commits[2].0.commit_id(), *commit_a.id());
assert_eq!(commits[0].1, hashset![RevsetGraphEdge::indirect(pos_a)]);
assert_eq!(commits[1].1, hashset![RevsetGraphEdge::indirect(pos_a)]);
assert_eq!(commits[2].1, hashset![RevsetGraphEdge::missing(pos_root)]);
tx.discard();
}
#[test]
fn test_graph_iterator_multiple_missing() {
let settings = testutils::user_settings();
let (_temp_dir, repo) = testutils::init_repo(&settings, true);
let mut tx = repo.start_transaction("test");
let mut_repo = tx.mut_repo();
// Tests that we get missing edges to "a" and "c" and not just one missing edge
// to the root. F
// / \ F
// d e => /|\
// |\ /| ~ B ~
// a B c ~
// \|/
// root
let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo);
let commit_a = graph_builder.initial_commit();
let commit_b = graph_builder.initial_commit();
let commit_c = graph_builder.initial_commit();
let commit_d = graph_builder.commit_with_parents(&[&commit_a, &commit_b]);
let commit_e = graph_builder.commit_with_parents(&[&commit_b, &commit_c]);
let commit_f = graph_builder.commit_with_parents(&[&commit_d, &commit_e]);
let pos_root = mut_repo
.index()
.commit_id_to_pos(repo.store().root_commit_id())
.unwrap();
let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap();
let pos_b = mut_repo.index().commit_id_to_pos(commit_b.id()).unwrap();
let pos_c = mut_repo.index().commit_id_to_pos(commit_c.id()).unwrap();
let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_b, &commit_f]);
let commits: Vec<_> = revset.iter().graph().collect();
drop(revset);
assert_eq!(commits.len(), 2);
assert_eq!(commits[0].0.commit_id(), *commit_f.id());
assert_eq!(commits[1].0.commit_id(), *commit_b.id());
assert_eq!(
commits[0].1,
hashset![
RevsetGraphEdge::missing(pos_a),
RevsetGraphEdge::indirect(pos_b),
RevsetGraphEdge::missing(pos_c),
]
);
assert_eq!(commits[1].1, hashset![RevsetGraphEdge::missing(pos_root)]);
tx.discard();
}
#[test]
fn test_graph_iterator_edge_to_ancestor() {
let settings = testutils::user_settings();
let (_temp_dir, repo) = testutils::init_repo(&settings, true);
let mut tx = repo.start_transaction("test");
let mut_repo = tx.mut_repo();
// Tests that we get both an edge from F to D and to D's ancestor C:
// F F
// |\ |\
// D e D :
// |\| => |\:
// b C ~ C
// | ~
// a
// |
// root
let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo);
let commit_a = graph_builder.initial_commit();
let commit_b = graph_builder.initial_commit();
let commit_c = graph_builder.commit_with_parents(&[&commit_a]);
let commit_d = graph_builder.commit_with_parents(&[&commit_b, &commit_c]);
let commit_e = graph_builder.commit_with_parents(&[&commit_c]);
let commit_f = graph_builder.commit_with_parents(&[&commit_d, &commit_e]);
let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap();
let pos_b = mut_repo.index().commit_id_to_pos(commit_b.id()).unwrap();
let pos_c = mut_repo.index().commit_id_to_pos(commit_c.id()).unwrap();
let pos_d = mut_repo.index().commit_id_to_pos(commit_d.id()).unwrap();
let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_c, &commit_d, &commit_f]);
let commits: Vec<_> = revset.iter().graph().collect();
drop(revset);
assert_eq!(commits.len(), 3);
assert_eq!(commits[0].0.commit_id(), *commit_f.id());
assert_eq!(commits[1].0.commit_id(), *commit_d.id());
assert_eq!(commits[2].0.commit_id(), *commit_c.id());
assert_eq!(
commits[0].1,
hashset![
RevsetGraphEdge::indirect(pos_c),
RevsetGraphEdge::direct(pos_d)
]
);
assert_eq!(
commits[1].1,
hashset![
RevsetGraphEdge::missing(pos_b),
RevsetGraphEdge::direct(pos_c)
]
);
assert_eq!(commits[2].1, hashset![RevsetGraphEdge::missing(pos_a)]);
tx.discard();
}

View file

@ -40,6 +40,7 @@ use jujube_lib::operation::Operation;
use jujube_lib::repo::{MutableRepo, ReadonlyRepo, RepoLoadError, RepoLoader};
use jujube_lib::repo_path::RepoPath;
use jujube_lib::revset::{RevsetError, RevsetParseError};
use jujube_lib::revset_graph_iterator::RevsetGraphEdgeType;
use jujube_lib::rewrite::{back_out_commit, merge_commit_trees, rebase_commit};
use jujube_lib::settings::UserSettings;
use jujube_lib::store::{StoreError, Timestamp, TreeValue};
@ -1061,11 +1062,29 @@ fn cmd_log(
let revset = revset::evaluate_expression(repo.as_repo_ref(), &revset_expression)?;
if use_graph {
let mut graph = AsciiGraphDrawer::new(&mut styler);
for index_entry in revset.iter() {
let mut edges = vec![];
for parent_position in index_entry.parent_positions() {
// TODO: Use the right kind of edge here.
edges.push(Edge::direct(parent_position));
for (index_entry, edges) in revset.iter().graph() {
let mut graphlog_edges = vec![];
// TODO: Should we update RevsetGraphIterator to yield this flag instead of all
// the missing edges since we don't care about where they point here
// anyway?
let mut has_missing = false;
for edge in edges {
match edge.edge_type {
RevsetGraphEdgeType::Missing => {
has_missing = true;
}
RevsetGraphEdgeType::Direct => graphlog_edges.push(Edge::Present {
direct: true,
target: edge.target,
}),
RevsetGraphEdgeType::Indirect => graphlog_edges.push(Edge::Present {
direct: false,
target: edge.target,
}),
}
}
if has_missing {
graphlog_edges.push(Edge::Missing);
}
let mut buffer = vec![];
// TODO: only use color if requested
@ -1078,7 +1097,7 @@ fn cmd_log(
if !buffer.ends_with(b"\n") {
buffer.push(b'\n');
}
graph.add_node(&index_entry.position(), &edges, b"o", &buffer)?;
graph.add_node(&index_entry.position(), &graphlog_edges, b"o", &buffer)?;
}
} else {
for index_entry in revset.iter() {