From 2cb7e91dc7e78d7076440e0dea595b905a50f0b2 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Thu, 8 Aug 2024 19:35:46 +0900 Subject: [PATCH] merged_tree: do not re-look up non-conflicting tree values by name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While measuring file(path) query, I noticed BTreeMap lookup appears in perf. It actually has a measurable cost if the history is linear and parent trees don't have to be merged dynamically. For merge-heavy history, the cost of tree merges is more significant. I'll address that separately. ``` % hyperfine --sort command --warmup 3 --runs 50 -L bin jj-1,jj-2 \ 'target/release-with-debug/{bin} -R ~/mirrors/git --ignore-working-copy \ log -r "::trunk() & ~merges() & file(root:builtin)" --no-graph -n100' Benchmark 1: target/release-with-debug/jj-1 .. Time (mean ± σ): 239.7 ms ± 7.1 ms [User: 192.1 ms, System: 46.5 ms] Range (min … max): 222.2 ms … 249.7 ms 50 runs Benchmark 2: target/release-with-debug/jj-2 .. Time (mean ± σ): 201.7 ms ± 6.9 ms [User: 153.7 ms, System: 46.6 ms] Range (min … max): 184.2 ms … 211.1 ms 50 runs Relative speed comparison 1.19 ± 0.05 target/release-with-debug/jj-1 .. 1.00 target/release-with-debug/jj-2 .. ``` --- lib/src/merged_tree.rs | 53 +++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 21 deletions(-) diff --git a/lib/src/merged_tree.rs b/lib/src/merged_tree.rs index abb452077..621465495 100644 --- a/lib/src/merged_tree.rs +++ b/lib/src/merged_tree.rs @@ -22,10 +22,11 @@ use std::sync::Arc; use std::task::{Context, Poll}; use std::{iter, vec}; +use either::Either; use futures::future::BoxFuture; use futures::stream::{BoxStream, StreamExt}; use futures::{Stream, TryStreamExt}; -use itertools::Itertools; +use itertools::{EitherOrBoth, Itertools}; use crate::backend; use crate::backend::{BackendResult, MergedTreeId, TreeId, TreeValue}; @@ -333,28 +334,36 @@ fn all_tree_basenames(trees: &Merge) -> impl Iterator( - trees1: &'a Merge, - trees2: &'a Merge, -) -> Box + 'a> { - fn merge_iters<'a>( - iter1: impl Iterator + 'a, - iter2: impl Iterator + 'a, - ) -> Box + 'a> { - Box::new(iter1.merge(iter2).dedup()) +fn all_tree_entries( + trees: &Merge, +) -> impl Iterator)> { + if let Some(tree) = trees.as_resolved() { + let iter = tree + .entries_non_recursive() + .map(|entry| (entry.name(), MergedTreeVal::Resolved(Some(entry.value())))); + Either::Left(iter) + } else { + // TODO: reimplement as entries iterator? + let iter = all_tree_basenames(trees).map(|name| (name, trees_value(trees, name))); + Either::Right(iter) } - merge_iters(all_tree_basenames(trees1), all_tree_basenames(trees2)) } fn merged_tree_entry_diff<'a>( - before: &'a Merge, - after: &'a Merge, + trees1: &'a Merge, + trees2: &'a Merge, ) -> impl Iterator, MergedTreeVal<'a>)> { - merged_tree_basenames(before, after).filter_map(|basename| { - let value_before = trees_value(before, basename); - let value_after = trees_value(after, basename); - (value_after != value_before).then_some((basename, value_before, value_after)) + itertools::merge_join_by( + all_tree_entries(trees1), + all_tree_entries(trees2), + |(name1, _), (name2, _)| name1.cmp(name2), + ) + .map(|entry| match entry { + EitherOrBoth::Both((name, value1), (_, value2)) => (name, value1, value2), + EitherOrBoth::Left((name, value1)) => (name, value1, MergedTreeVal::Resolved(None)), + EitherOrBoth::Right((name, value2)) => (name, MergedTreeVal::Resolved(None), value2), }) + .filter(|(_, value1, value2)| value1 != value2) } fn trees_value<'a>(trees: &'a Merge, basename: &RepoPathComponent) -> MergedTreeVal<'a> { @@ -383,6 +392,8 @@ fn merge_trees(merge: &Merge) -> BackendResult> { // any conflicts. let mut new_tree = backend::Tree::default(); let mut conflicts = vec![]; + // TODO: add all_tree_entries()-like function that doesn't change the arity + // of conflicts? for basename in all_tree_basenames(merge) { let path_merge = merge.map(|tree| tree.value(basename).cloned()); let path = dir.join(basename); @@ -467,9 +478,9 @@ impl TreeEntriesDirItem { fn new(trees: &Merge, matcher: &dyn Matcher) -> Self { let mut entries = vec![]; let dir = trees.first().dir(); - for name in all_tree_basenames(trees) { + for (name, value) in all_tree_entries(trees) { let path = dir.join(name); - let value = trees_value(trees, name).to_merge(); + let value = value.to_merge(); if value.is_tree() { // TODO: Handle the other cases (specific files and trees) if matcher.visit(&path).is_nothing() { @@ -533,8 +544,8 @@ impl From<&Merge> for ConflictsDirItem { } let mut entries = vec![]; - for basename in all_tree_basenames(trees) { - match trees_value(trees, basename) { + for (basename, value) in all_tree_entries(trees) { + match value { MergedTreeVal::Resolved(_) => {} MergedTreeVal::Conflict(tree_values) => { entries.push((dir.join(basename), tree_values));