From 817713c921f8def98ad3fd7c8021f4edbc6ca76c Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Mon, 24 Jul 2023 00:31:00 +0900 Subject: [PATCH] graphlog: use IndexPosition until transitive edges get eliminated This partially reverts 4c8f484278de "graphlog: key by commit id (not index position)." As Martin pointed out, it made "log -r 'tags()' -T.." in git repo super slow. Apparently, both clone() and hash map insertion/lookup costs increased by that change. Since we don't need CommitId inside the graph iterator, we can simply replace it with IndexPosition, and resolve it to CommitId later. --- lib/src/default_revset_engine.rs | 4 +- lib/src/default_revset_graph_iterator.rs | 101 +++++++++++++++-------- lib/src/revset.rs | 2 +- 3 files changed, 68 insertions(+), 39 deletions(-) diff --git a/lib/src/default_revset_engine.rs b/lib/src/default_revset_engine.rs index 31bd2173f..1fcb0fcf0 100644 --- a/lib/src/default_revset_engine.rs +++ b/lib/src/default_revset_engine.rs @@ -78,7 +78,7 @@ impl<'index> RevsetImpl<'index> { } pub fn iter_graph_impl(&self) -> RevsetGraphIterator<'_, 'index> { - RevsetGraphIterator::new(self.inner.iter()) + RevsetGraphIterator::new(self.index, self.inner.iter()) } } @@ -104,7 +104,7 @@ impl<'index> Revset<'index> for RevsetImpl<'index> { } fn iter_graph(&self) -> Box)> + '_> { - Box::new(RevsetGraphIterator::new(self.inner.iter())) + Box::new(self.iter_graph_impl()) } fn change_id_index(&self) -> Box { diff --git a/lib/src/default_revset_graph_iterator.rs b/lib/src/default_revset_graph_iterator.rs index fe483a53d..8933e67eb 100644 --- a/lib/src/default_revset_graph_iterator.rs +++ b/lib/src/default_revset_graph_iterator.rs @@ -14,13 +14,46 @@ #![allow(missing_docs)] -use std::cmp::min; +use std::cmp::{min, Reverse}; use std::collections::{BTreeMap, HashSet}; use crate::backend::CommitId; -use crate::default_index_store::{IndexEntry, IndexPosition}; +use crate::default_index_store::{CompositeIndex, IndexEntry, IndexPosition}; use crate::revset::{RevsetGraphEdge, RevsetGraphEdgeType}; +/// Like `RevsetGraphEdge`, but stores `IndexPosition` instead. +/// +/// This can be cheaply allocated and hashed compared to `CommitId`-based type. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +struct IndexGraphEdge { + target: IndexPosition, + edge_type: RevsetGraphEdgeType, +} + +impl IndexGraphEdge { + fn missing(target: IndexPosition) -> Self { + let edge_type = RevsetGraphEdgeType::Missing; + IndexGraphEdge { target, edge_type } + } + + fn direct(target: IndexPosition) -> Self { + let edge_type = RevsetGraphEdgeType::Direct; + IndexGraphEdge { target, edge_type } + } + + fn indirect(target: IndexPosition) -> Self { + let edge_type = RevsetGraphEdgeType::Indirect; + IndexGraphEdge { target, edge_type } + } + + fn to_revset_edge(&self, index: CompositeIndex<'_>) -> RevsetGraphEdge { + RevsetGraphEdge { + target: index.entry_by_pos(self.target).commit_id(), + edge_type: self.edge_type, + } + } +} + /// Given an iterator over some set of revisions, yields the same revisions with /// associated edge types. /// @@ -88,6 +121,7 @@ use crate::revset::{RevsetGraphEdge, RevsetGraphEdgeType}; /// could lead to "D", but that would require extra book-keeping to remember for /// later that the edges from "f" and "H" are only partially computed. pub struct RevsetGraphIterator<'revset, 'index> { + index: CompositeIndex<'index>, input_set_iter: Box> + 'revset>, /// Commits in the input set we had to take out of the iterator while /// walking external edges. Does not necessarily include the commit @@ -98,15 +132,17 @@ pub struct RevsetGraphIterator<'revset, 'index> { min_position: IndexPosition, /// Edges for commits not in the input set. // TODO: Remove unneeded entries here as we go (that's why it's an ordered map)? - edges: BTreeMap>, + edges: BTreeMap>, skip_transitive_edges: bool, } impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> { pub fn new( + index: CompositeIndex<'index>, input_set_iter: Box> + 'revset>, ) -> RevsetGraphIterator<'revset, 'index> { RevsetGraphIterator { + index, input_set_iter, look_ahead: Default::default(), min_position: IndexPosition::MAX, @@ -130,24 +166,23 @@ impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> { fn edges_from_internal_commit( &mut self, index_entry: &IndexEntry<'index>, - ) -> HashSet<(IndexPosition, RevsetGraphEdge)> { + ) -> HashSet { if let Some(edges) = self.edges.get(&index_entry.position()) { return edges.clone(); } let mut edges = HashSet::new(); for parent in index_entry.parents() { let parent_position = parent.position(); - let parent_commit_id = parent.commit_id(); self.consume_to(parent_position); if self.look_ahead.contains_key(&parent_position) { - edges.insert((parent_position, RevsetGraphEdge::direct(parent_commit_id))); + edges.insert(IndexGraphEdge::direct(parent_position)); } else { let parent_edges = self.edges_from_external_commit(parent); if parent_edges .iter() - .all(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Missing) + .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing) { - edges.insert((parent_position, RevsetGraphEdge::missing(parent_commit_id))); + edges.insert(IndexGraphEdge::missing(parent_position)); } else { edges.extend(parent_edges); } @@ -160,7 +195,7 @@ impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> { fn edges_from_external_commit( &mut self, index_entry: IndexEntry<'index>, - ) -> HashSet<(IndexPosition, RevsetGraphEdge)> { + ) -> HashSet { let position = index_entry.position(); let mut stack = vec![index_entry]; while let Some(entry) = stack.last() { @@ -172,28 +207,19 @@ impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> { self.consume_to(parent_position); if self.look_ahead.contains_key(&parent_position) { // We have found a path back into the input set - edges.insert(( - parent_position, - RevsetGraphEdge::indirect(parent.commit_id()), - )); + edges.insert(IndexGraphEdge::indirect(parent_position)); } else if let Some(parent_edges) = self.edges.get(&parent_position) { if parent_edges .iter() - .all(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Missing) + .all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing) { - edges.insert(( - parent_position, - RevsetGraphEdge::missing(parent.commit_id()), - )); + edges.insert(IndexGraphEdge::missing(parent_position)); } else { edges.extend(parent_edges.iter().cloned()); } } else if parent_position < self.min_position { // The parent is not in the input set - edges.insert(( - parent_position, - RevsetGraphEdge::missing(parent.commit_id()), - )); + edges.insert(IndexGraphEdge::missing(parent_position)); } else { // The parent is not in the input set but it's somewhere in the range // where we have commits in the input set, so continue searching. @@ -211,11 +237,11 @@ impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> { fn remove_transitive_edges( &mut self, - edges: HashSet<(IndexPosition, RevsetGraphEdge)>, - ) -> HashSet<(IndexPosition, RevsetGraphEdge)> { + edges: HashSet, + ) -> HashSet { if !edges .iter() - .any(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Indirect) + .any(|edge| edge.edge_type == RevsetGraphEdgeType::Indirect) { return edges; } @@ -223,29 +249,29 @@ impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> { let mut initial_targets = HashSet::new(); let mut work = vec![]; // To start with, add the edges one step after the input edges. - for (target, edge) in &edges { - initial_targets.insert(target); + for edge in &edges { + initial_targets.insert(edge.target); if edge.edge_type != RevsetGraphEdgeType::Missing { - let entry = self.look_ahead.get(target).unwrap().clone(); + let entry = self.look_ahead.get(&edge.target).unwrap().clone(); min_generation = min(min_generation, entry.generation_number()); work.extend(self.edges_from_internal_commit(&entry)); } } // Find commits reachable transitively and add them to the `unwanted` set. let mut unwanted = HashSet::new(); - while let Some((target, edge)) = work.pop() { - if edge.edge_type == RevsetGraphEdgeType::Missing || target < self.min_position { + while let Some(edge) = work.pop() { + if edge.edge_type == RevsetGraphEdgeType::Missing || edge.target < self.min_position { continue; } - if !unwanted.insert(target) { + if !unwanted.insert(edge.target) { // Already visited continue; } - if initial_targets.contains(&target) { + if initial_targets.contains(&edge.target) { // Already visited continue; } - let entry = self.look_ahead.get(&target).unwrap().clone(); + let entry = self.look_ahead.get(&edge.target).unwrap().clone(); if entry.generation_number() < min_generation { continue; } @@ -254,7 +280,7 @@ impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> { edges .into_iter() - .filter(|(target, _)| !unwanted.contains(target)) + .filter(|edge| !unwanted.contains(&edge.target)) .collect() } @@ -281,8 +307,11 @@ impl<'revset, 'index> Iterator for RevsetGraphIterator<'revset, 'index> { edges = self.remove_transitive_edges(edges); } let mut edges: Vec<_> = edges.into_iter().collect(); - edges.sort_by(|(target_pos1, _), (target_pos2, _)| target_pos2.cmp(target_pos1)); - let edges = edges.into_iter().map(|(_, edge)| edge).collect(); + edges.sort_unstable_by_key(|edge| Reverse(edge.target)); + let edges = edges + .iter() + .map(|edge| edge.to_revset_edge(self.index)) + .collect(); Some((index_entry.commit_id(), edges)) } } diff --git a/lib/src/revset.rs b/lib/src/revset.rs index 9d8881a55..44b516dba 100644 --- a/lib/src/revset.rs +++ b/lib/src/revset.rs @@ -2185,7 +2185,7 @@ impl RevsetGraphEdge { } } -#[derive(Debug, PartialEq, Eq, Clone, Hash)] +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] pub enum RevsetGraphEdgeType { Missing, Direct,