From 6399c392fdf3f6ffd516526dee47b840a83e48cf Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Wed, 15 Nov 2023 15:02:22 +0900 Subject: [PATCH] index: make heads_pos() deduplicate entries without building separate set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is much faster (maybe because of better cache locality?) Another option is to use BTreeSet, but the BinaryHeap version is slightly faster. "bench revset" result in my linux repo: revsets/heads(tags()) --------------------- baseline 3.28 560.6±4.01ms 1 2.92 500.0±2.99ms 2 1.98 339.6±1.64ms 3 (this) 1.00 171.2±0.30ms --- lib/src/default_index_store.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/src/default_index_store.rs b/lib/src/default_index_store.rs index 29ed09b08..38151fa77 100644 --- a/lib/src/default_index_store.rs +++ b/lib/src/default_index_store.rs @@ -974,17 +974,14 @@ impl<'a> CompositeIndex<'a> { // Walk ancestors of the parents of the candidates. Remove visited commits from // set of candidates. Stop walking when we have gone past the minimum // candidate generation. - let mut visited = HashSet::new(); - while let Some(item) = work.pop() { - if !visited.insert(item.pos) { - continue; - } + while let Some(item) = dedup_pop(&mut work) { if item.generation < min_generation { break; } candidate_positions.remove(&item.pos); let entry = self.entry_by_pos(item.pos); for parent_entry in entry.parents() { + assert!(parent_entry.pos < entry.pos); work.push(IndexPositionByGeneration::from(&parent_entry)); } } @@ -1596,6 +1593,16 @@ impl RevWalkItemGenerationRange { } } +/// Removes the greatest items (including duplicates) from the heap, returns +/// one. +fn dedup_pop(heap: &mut BinaryHeap) -> Option { + let item = heap.pop()?; + while heap.peek() == Some(&item) { + heap.pop().unwrap(); + } + Some(item) +} + impl IndexSegment for ReadonlyIndexImpl { fn segment_num_parent_commits(&self) -> u32 { self.num_parent_commits