diff --git a/lib/src/index.rs b/lib/src/index.rs index 7d095b120..05167f791 100644 --- a/lib/src/index.rs +++ b/lib/src/index.rs @@ -35,6 +35,10 @@ use crate::store::{ChangeId, CommitId}; #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)] pub struct IndexPosition(u32); +impl IndexPosition { + pub const MAX: Self = IndexPosition(u32::MAX); +} + #[derive(Clone)] pub enum IndexRef<'a> { Readonly(&'a ReadonlyIndex), @@ -987,7 +991,7 @@ pub struct IndexStats { } #[derive(Clone, Eq, PartialEq)] -struct IndexEntryByPosition<'a>(IndexEntry<'a>); +pub struct IndexEntryByPosition<'a>(IndexEntry<'a>); impl Ord for IndexEntryByPosition<'_> { fn cmp(&self, other: &Self) -> Ordering { @@ -1333,6 +1337,7 @@ impl Debug for IndexEntry<'_> { f.debug_struct("IndexEntry") .field("pos", &self.pos) .field("local_pos", &self.local_pos) + .field("commit_id", &self.commit_id().hex()) .finish() } } diff --git a/lib/src/lib.rs b/lib/src/lib.rs index b9af04e54..40ae2682f 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -45,6 +45,7 @@ pub mod protos; pub mod repo; pub mod repo_path; pub mod revset; +pub mod revset_graph_iterator; pub mod rewrite; pub mod settings; pub mod simple_op_store; diff --git a/lib/src/revset.rs b/lib/src/revset.rs index 671270429..501c2382b 100644 --- a/lib/src/revset.rs +++ b/lib/src/revset.rs @@ -24,6 +24,7 @@ use thiserror::Error; use crate::commit::Commit; use crate::index::{HexPrefix, IndexEntry, IndexPosition, PrefixResolution, RevWalk}; use crate::repo::RepoRef; +use crate::revset_graph_iterator::RevsetGraphIterator; use crate::store::{CommitId, StoreError}; #[derive(Debug, Error, PartialEq, Eq)] @@ -493,6 +494,10 @@ impl<'revset, 'repo> RevsetIterator<'revset, 'repo> { fn new(inner: Box> + 'revset>) -> Self { Self { inner } } + + pub fn graph(self) -> RevsetGraphIterator<'revset, 'repo> { + RevsetGraphIterator::new(self) + } } impl<'repo> Iterator for RevsetIterator<'_, 'repo> { @@ -896,6 +901,19 @@ fn non_obsolete_heads<'revset, 'repo: 'revset>( Box::new(EagerRevset { index_entries }) } +pub fn revset_for_commits<'revset, 'repo: 'revset>( + repo: RepoRef<'repo>, + commits: &[&Commit], +) -> Box + 'revset> { + let index = repo.index(); + let mut index_entries: Vec<_> = commits + .iter() + .map(|commit| index.entry_by_id(commit.id()).unwrap()) + .collect(); + index_entries.sort_by_key(|b| Reverse(b.position())); + Box::new(EagerRevset { index_entries }) +} + #[cfg(test)] mod tests { use super::*; diff --git a/lib/src/revset_graph_iterator.rs b/lib/src/revset_graph_iterator.rs new file mode 100644 index 000000000..b14e45d5f --- /dev/null +++ b/lib/src/revset_graph_iterator.rs @@ -0,0 +1,208 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::{BTreeMap, HashSet}; + +use crate::index::{IndexEntry, IndexPosition}; +use crate::revset::RevsetIterator; + +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +pub struct RevsetGraphEdge { + pub target: IndexPosition, + pub edge_type: RevsetGraphEdgeType, +} + +impl RevsetGraphEdge { + pub fn missing(target: IndexPosition) -> Self { + Self { + target, + edge_type: RevsetGraphEdgeType::Missing, + } + } + pub fn direct(target: IndexPosition) -> Self { + Self { + target, + edge_type: RevsetGraphEdgeType::Direct, + } + } + pub fn indirect(target: IndexPosition) -> Self { + Self { + target, + edge_type: RevsetGraphEdgeType::Indirect, + } + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +pub enum RevsetGraphEdgeType { + Missing, + Direct, + Indirect, +} + +// Given an iterator over some set of revisions, yields the same revisions with +// associated edge types. +// +// If a revision's parent is in the input set, then the edge will be "direct". +// Otherwise, there will be one "indirect" edge for each closest ancestor in the +// set, and one "missing" edge for each edge leading outside the set. +// +// Example (uppercase characters are in the input set): +// +// A A +// |\ |\ +// B c B : +// |\| => |\: +// d E ~ E +// |/ ~ +// root +// +// The implementation works by walking the input iterator in one commit at a +// time. It then considers all parents of the commit. It looks ahead in the +// input iterator far enough that all the parents will have been consumed if +// they are in the input (and puts them away so we can emit them later). If a +// parent of the current commit is not in the input set (i.e. it was not +// in the look-ahead), we walk these external commits until we end up back back +// in the input set. That walk may result in consuming more elements from the +// input iterator. In the example above, when we consider "A", we will initially +// look ahead to "B" and "c". When we consider edges from the external commit +// "c", we will further consume the input iterator to "E". +// +// Missing edges are those that don't lead back into the input set. If all edges +// from an external commit are missing, we consider the edge to that edge to +// also be missing. In the example above, that means that "B" will have a +// missing edge to "d" rather than to the root. +pub struct RevsetGraphIterator<'revset, 'repo> { + input_set_iter: RevsetIterator<'revset, 'repo>, + // Commits in the input set we had to take out of the iterator while walking external + // edges. Does not necessarily include the commit we're currently about to emit. + look_ahead: BTreeMap>, + // The last consumed position. This is always the smallest key in the look_ahead map, but it's + // faster to keep a separate field for it. + min_position: IndexPosition, + // Edges for commits not in the input set. + // TODO: Remove unneeded entries here as we go (that's why it's an ordered map)? + external_commits: BTreeMap>, +} + +impl<'revset, 'repo> RevsetGraphIterator<'revset, 'repo> { + pub fn new(iter: RevsetIterator<'revset, 'repo>) -> RevsetGraphIterator<'revset, 'repo> { + RevsetGraphIterator { + input_set_iter: iter, + look_ahead: Default::default(), + min_position: IndexPosition::MAX, + external_commits: Default::default(), + } + } + + fn next_index_entry(&mut self) -> Option> { + if let Some((_, index_entry)) = self.look_ahead.pop_last() { + return Some(index_entry); + } + self.input_set_iter.next() + } + + fn edges_from_internal_commit( + &mut self, + index_entry: &IndexEntry<'repo>, + ) -> HashSet { + let mut edges = HashSet::new(); + for parent in index_entry.parents() { + let parent_position = parent.position(); + self.consume_to(parent_position); + if self.look_ahead.contains_key(&parent_position) { + edges.insert(RevsetGraphEdge::direct(parent_position)); + } else { + let parent_edges = self.edges_from_external_commit(parent); + if parent_edges + .iter() + .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing) + { + edges.insert(RevsetGraphEdge::missing(parent_position)); + } else { + edges.extend(parent_edges); + } + } + } + edges + } + + fn edges_from_external_commit( + &mut self, + index_entry: IndexEntry<'repo>, + ) -> HashSet { + let position = index_entry.position(); + let mut stack = vec![index_entry]; + while let Some(entry) = stack.last() { + let position = entry.position(); + let mut edges = HashSet::new(); + let mut parents_complete = true; + for parent in entry.parents() { + let parent_position = parent.position(); + self.consume_to(parent_position); + if self.look_ahead.contains_key(&parent_position) { + // We have found a path back into the input set + edges.insert(RevsetGraphEdge::indirect(parent_position)); + } else if let Some(parent_edges) = self.external_commits.get(&parent_position) { + if parent_edges + .iter() + .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing) + { + edges.insert(RevsetGraphEdge::missing(parent_position)); + } else { + edges.extend(parent_edges.iter().cloned()); + } + } else if parent_position < self.min_position { + // The parent is not in the input set + edges.insert(RevsetGraphEdge::missing(parent_position)); + } else { + // The parent is not in the input set but it's somewhere in the range + // where we have commits in the input set, so continue searching. + stack.push(parent); + parents_complete = false; + } + } + if parents_complete { + stack.pop().unwrap(); + self.external_commits.insert(position, edges); + } + } + self.external_commits.get(&position).unwrap().clone() + } + + fn consume_to(&mut self, pos: IndexPosition) { + while pos < self.min_position { + if let Some(next_entry) = self.input_set_iter.next() { + let next_position = next_entry.position(); + self.look_ahead.insert(next_position, next_entry); + self.min_position = next_position; + } else { + break; + } + } + } +} + +impl<'revset, 'repo> Iterator for RevsetGraphIterator<'revset, 'repo> { + type Item = (IndexEntry<'repo>, HashSet); + + fn next(&mut self) -> Option { + if let Some(index_entry) = self.next_index_entry() { + let edges = self.edges_from_internal_commit(&index_entry); + Some((index_entry, edges)) + } else { + None + } + } +} diff --git a/lib/tests/test_revset_graph_iterator.rs b/lib/tests/test_revset_graph_iterator.rs new file mode 100644 index 000000000..fa0f05753 --- /dev/null +++ b/lib/tests/test_revset_graph_iterator.rs @@ -0,0 +1,261 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use jujube_lib::revset::revset_for_commits; +use jujube_lib::revset_graph_iterator::RevsetGraphEdge; +use jujube_lib::testutils; +use jujube_lib::testutils::CommitGraphBuilder; +use maplit::hashset; + +#[test] +fn test_graph_iterator_linearized() { + let settings = testutils::user_settings(); + let (_temp_dir, repo) = testutils::init_repo(&settings, true); + let mut tx = repo.start_transaction("test"); + let mut_repo = tx.mut_repo(); + + // Tests that a fork and a merge becomes a single edge: + // D + // |\ D + // b c => : + // |/ A + // A ~ + // | + // root + let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo); + let commit_a = graph_builder.initial_commit(); + let commit_b = graph_builder.commit_with_parents(&[&commit_a]); + let commit_c = graph_builder.commit_with_parents(&[&commit_a]); + let commit_d = graph_builder.commit_with_parents(&[&commit_b, &commit_c]); + let pos_root = mut_repo + .index() + .commit_id_to_pos(repo.store().root_commit_id()) + .unwrap(); + let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap(); + + let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_a, &commit_d]); + let commits: Vec<_> = revset.iter().graph().collect(); + drop(revset); + assert_eq!(commits.len(), 2); + assert_eq!(commits[0].0.commit_id(), *commit_d.id()); + assert_eq!(commits[1].0.commit_id(), *commit_a.id()); + assert_eq!(commits[0].1, hashset![RevsetGraphEdge::indirect(pos_a)]); + assert_eq!(commits[1].1, hashset![RevsetGraphEdge::missing(pos_root)]); + + tx.discard(); +} + +#[test] +fn test_graph_iterator_virtual_octopus() { + let settings = testutils::user_settings(); + let (_temp_dir, repo) = testutils::init_repo(&settings, true); + let mut tx = repo.start_transaction("test"); + let mut_repo = tx.mut_repo(); + + // Tests that merges outside the set can result in more parent edges than there + // was in the input: F + // |\ + // d e F + // |\|\ => /|\ + // A B C A B C + // \|/ ~ ~ ~ + // root + let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo); + let commit_a = graph_builder.initial_commit(); + let commit_b = graph_builder.initial_commit(); + let commit_c = graph_builder.initial_commit(); + let commit_d = graph_builder.commit_with_parents(&[&commit_a, &commit_b]); + let commit_e = graph_builder.commit_with_parents(&[&commit_b, &commit_c]); + let commit_f = graph_builder.commit_with_parents(&[&commit_d, &commit_e]); + let pos_root = mut_repo + .index() + .commit_id_to_pos(repo.store().root_commit_id()) + .unwrap(); + let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap(); + let pos_b = mut_repo.index().commit_id_to_pos(commit_b.id()).unwrap(); + let pos_c = mut_repo.index().commit_id_to_pos(commit_c.id()).unwrap(); + + let revset = revset_for_commits( + mut_repo.as_repo_ref(), + &[&commit_a, &commit_b, &commit_c, &commit_f], + ); + let commits: Vec<_> = revset.iter().graph().collect(); + drop(revset); + assert_eq!(commits.len(), 4); + assert_eq!(commits[0].0.commit_id(), *commit_f.id()); + assert_eq!(commits[1].0.commit_id(), *commit_c.id()); + assert_eq!(commits[2].0.commit_id(), *commit_b.id()); + assert_eq!(commits[3].0.commit_id(), *commit_a.id()); + assert_eq!( + commits[0].1, + hashset![ + RevsetGraphEdge::indirect(pos_a), + RevsetGraphEdge::indirect(pos_b), + RevsetGraphEdge::indirect(pos_c), + ] + ); + assert_eq!(commits[1].1, hashset![RevsetGraphEdge::missing(pos_root)]); + assert_eq!(commits[2].1, hashset![RevsetGraphEdge::missing(pos_root)]); + assert_eq!(commits[3].1, hashset![RevsetGraphEdge::missing(pos_root)]); + + tx.discard(); +} + +#[test] +fn test_graph_iterator_simple_fork() { + let settings = testutils::user_settings(); + let (_temp_dir, repo) = testutils::init_repo(&settings, true); + let mut tx = repo.start_transaction("test"); + let mut_repo = tx.mut_repo(); + + // Tests that the branch with "C" gets emitted correctly: + // E + // | + // d + // | C E C + // |/ |/ + // b => A + // | ~ + // A + // | + // root + let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo); + let commit_a = graph_builder.initial_commit(); + let commit_b = graph_builder.commit_with_parents(&[&commit_a]); + let commit_c = graph_builder.commit_with_parents(&[&commit_b]); + let commit_d = graph_builder.commit_with_parents(&[&commit_b]); + let commit_e = graph_builder.commit_with_parents(&[&commit_d]); + let pos_root = mut_repo + .index() + .commit_id_to_pos(repo.store().root_commit_id()) + .unwrap(); + let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap(); + + let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_a, &commit_c, &commit_e]); + let commits: Vec<_> = revset.iter().graph().collect(); + drop(revset); + assert_eq!(commits.len(), 3); + assert_eq!(commits[0].0.commit_id(), *commit_e.id()); + assert_eq!(commits[1].0.commit_id(), *commit_c.id()); + assert_eq!(commits[2].0.commit_id(), *commit_a.id()); + assert_eq!(commits[0].1, hashset![RevsetGraphEdge::indirect(pos_a)]); + assert_eq!(commits[1].1, hashset![RevsetGraphEdge::indirect(pos_a)]); + assert_eq!(commits[2].1, hashset![RevsetGraphEdge::missing(pos_root)]); + + tx.discard(); +} + +#[test] +fn test_graph_iterator_multiple_missing() { + let settings = testutils::user_settings(); + let (_temp_dir, repo) = testutils::init_repo(&settings, true); + let mut tx = repo.start_transaction("test"); + let mut_repo = tx.mut_repo(); + + // Tests that we get missing edges to "a" and "c" and not just one missing edge + // to the root. F + // / \ F + // d e => /|\ + // |\ /| ~ B ~ + // a B c ~ + // \|/ + // root + let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo); + let commit_a = graph_builder.initial_commit(); + let commit_b = graph_builder.initial_commit(); + let commit_c = graph_builder.initial_commit(); + let commit_d = graph_builder.commit_with_parents(&[&commit_a, &commit_b]); + let commit_e = graph_builder.commit_with_parents(&[&commit_b, &commit_c]); + let commit_f = graph_builder.commit_with_parents(&[&commit_d, &commit_e]); + let pos_root = mut_repo + .index() + .commit_id_to_pos(repo.store().root_commit_id()) + .unwrap(); + let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap(); + let pos_b = mut_repo.index().commit_id_to_pos(commit_b.id()).unwrap(); + let pos_c = mut_repo.index().commit_id_to_pos(commit_c.id()).unwrap(); + + let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_b, &commit_f]); + let commits: Vec<_> = revset.iter().graph().collect(); + drop(revset); + assert_eq!(commits.len(), 2); + assert_eq!(commits[0].0.commit_id(), *commit_f.id()); + assert_eq!(commits[1].0.commit_id(), *commit_b.id()); + assert_eq!( + commits[0].1, + hashset![ + RevsetGraphEdge::missing(pos_a), + RevsetGraphEdge::indirect(pos_b), + RevsetGraphEdge::missing(pos_c), + ] + ); + assert_eq!(commits[1].1, hashset![RevsetGraphEdge::missing(pos_root)]); + + tx.discard(); +} + +#[test] +fn test_graph_iterator_edge_to_ancestor() { + let settings = testutils::user_settings(); + let (_temp_dir, repo) = testutils::init_repo(&settings, true); + let mut tx = repo.start_transaction("test"); + let mut_repo = tx.mut_repo(); + + // Tests that we get both an edge from F to D and to D's ancestor C: + // F F + // |\ |\ + // D e D : + // |\| => |\: + // b C ~ C + // | ~ + // a + // | + // root + let mut graph_builder = CommitGraphBuilder::new(&settings, mut_repo); + let commit_a = graph_builder.initial_commit(); + let commit_b = graph_builder.initial_commit(); + let commit_c = graph_builder.commit_with_parents(&[&commit_a]); + let commit_d = graph_builder.commit_with_parents(&[&commit_b, &commit_c]); + let commit_e = graph_builder.commit_with_parents(&[&commit_c]); + let commit_f = graph_builder.commit_with_parents(&[&commit_d, &commit_e]); + let pos_a = mut_repo.index().commit_id_to_pos(commit_a.id()).unwrap(); + let pos_b = mut_repo.index().commit_id_to_pos(commit_b.id()).unwrap(); + let pos_c = mut_repo.index().commit_id_to_pos(commit_c.id()).unwrap(); + let pos_d = mut_repo.index().commit_id_to_pos(commit_d.id()).unwrap(); + + let revset = revset_for_commits(mut_repo.as_repo_ref(), &[&commit_c, &commit_d, &commit_f]); + let commits: Vec<_> = revset.iter().graph().collect(); + drop(revset); + assert_eq!(commits.len(), 3); + assert_eq!(commits[0].0.commit_id(), *commit_f.id()); + assert_eq!(commits[1].0.commit_id(), *commit_d.id()); + assert_eq!(commits[2].0.commit_id(), *commit_c.id()); + assert_eq!( + commits[0].1, + hashset![ + RevsetGraphEdge::indirect(pos_c), + RevsetGraphEdge::direct(pos_d) + ] + ); + assert_eq!( + commits[1].1, + hashset![ + RevsetGraphEdge::missing(pos_b), + RevsetGraphEdge::direct(pos_c) + ] + ); + assert_eq!(commits[2].1, hashset![RevsetGraphEdge::missing(pos_a)]); + + tx.discard(); +} diff --git a/src/commands.rs b/src/commands.rs index 92a6d7344..f21b78466 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -40,6 +40,7 @@ use jujube_lib::operation::Operation; use jujube_lib::repo::{MutableRepo, ReadonlyRepo, RepoLoadError, RepoLoader}; use jujube_lib::repo_path::RepoPath; use jujube_lib::revset::{RevsetError, RevsetParseError}; +use jujube_lib::revset_graph_iterator::RevsetGraphEdgeType; use jujube_lib::rewrite::{back_out_commit, merge_commit_trees, rebase_commit}; use jujube_lib::settings::UserSettings; use jujube_lib::store::{StoreError, Timestamp, TreeValue}; @@ -1061,11 +1062,29 @@ fn cmd_log( let revset = revset::evaluate_expression(repo.as_repo_ref(), &revset_expression)?; if use_graph { let mut graph = AsciiGraphDrawer::new(&mut styler); - for index_entry in revset.iter() { - let mut edges = vec![]; - for parent_position in index_entry.parent_positions() { - // TODO: Use the right kind of edge here. - edges.push(Edge::direct(parent_position)); + for (index_entry, edges) in revset.iter().graph() { + let mut graphlog_edges = vec![]; + // TODO: Should we update RevsetGraphIterator to yield this flag instead of all + // the missing edges since we don't care about where they point here + // anyway? + let mut has_missing = false; + for edge in edges { + match edge.edge_type { + RevsetGraphEdgeType::Missing => { + has_missing = true; + } + RevsetGraphEdgeType::Direct => graphlog_edges.push(Edge::Present { + direct: true, + target: edge.target, + }), + RevsetGraphEdgeType::Indirect => graphlog_edges.push(Edge::Present { + direct: false, + target: edge.target, + }), + } + } + if has_missing { + graphlog_edges.push(Edge::Missing); } let mut buffer = vec![]; // TODO: only use color if requested @@ -1078,7 +1097,7 @@ fn cmd_log( if !buffer.ends_with(b"\n") { buffer.push(b'\n'); } - graph.add_node(&index_entry.position(), &edges, b"o", &buffer)?; + graph.add_node(&index_entry.position(), &graphlog_edges, b"o", &buffer)?; } } else { for index_entry in revset.iter() {