merged_tree: do not re-look up non-conflicting tree values by name

While measuring file(path) query, I noticed BTreeMap lookup appears in perf.
It actually has a measurable cost if the history is linear and parent trees
don't have to be merged dynamically. For merge-heavy history, the cost of
tree merges is more significant. I'll address that separately.

```
% hyperfine --sort command --warmup 3 --runs 50 -L bin jj-1,jj-2 \
  'target/release-with-debug/{bin} -R ~/mirrors/git --ignore-working-copy \
   log -r "::trunk() & ~merges() & file(root:builtin)" --no-graph -n100'
Benchmark 1: target/release-with-debug/jj-1 ..
  Time (mean ± σ):     239.7 ms ±   7.1 ms    [User: 192.1 ms, System: 46.5 ms]
  Range (min … max):   222.2 ms … 249.7 ms    50 runs

Benchmark 2: target/release-with-debug/jj-2 ..
  Time (mean ± σ):     201.7 ms ±   6.9 ms    [User: 153.7 ms, System: 46.6 ms]
  Range (min … max):   184.2 ms … 211.1 ms    50 runs

Relative speed comparison
        1.19 ±  0.05  target/release-with-debug/jj-1 ..
        1.00          target/release-with-debug/jj-2 ..
```
This commit is contained in:
Yuya Nishihara 2024-08-08 19:35:46 +09:00
parent 9fb9e732c1
commit 2cb7e91dc7

View file

@ -22,10 +22,11 @@ use std::sync::Arc;
use std::task::{Context, Poll}; use std::task::{Context, Poll};
use std::{iter, vec}; use std::{iter, vec};
use either::Either;
use futures::future::BoxFuture; use futures::future::BoxFuture;
use futures::stream::{BoxStream, StreamExt}; use futures::stream::{BoxStream, StreamExt};
use futures::{Stream, TryStreamExt}; use futures::{Stream, TryStreamExt};
use itertools::Itertools; use itertools::{EitherOrBoth, Itertools};
use crate::backend; use crate::backend;
use crate::backend::{BackendResult, MergedTreeId, TreeId, TreeValue}; use crate::backend::{BackendResult, MergedTreeId, TreeId, TreeValue};
@ -333,28 +334,36 @@ fn all_tree_basenames(trees: &Merge<Tree>) -> impl Iterator<Item = &RepoPathComp
.dedup() .dedup()
} }
fn merged_tree_basenames<'a>( fn all_tree_entries(
trees1: &'a Merge<Tree>, trees: &Merge<Tree>,
trees2: &'a Merge<Tree>, ) -> impl Iterator<Item = (&RepoPathComponent, MergedTreeVal<'_>)> {
) -> Box<dyn Iterator<Item = &'a RepoPathComponent> + 'a> { if let Some(tree) = trees.as_resolved() {
fn merge_iters<'a>( let iter = tree
iter1: impl Iterator<Item = &'a RepoPathComponent> + 'a, .entries_non_recursive()
iter2: impl Iterator<Item = &'a RepoPathComponent> + 'a, .map(|entry| (entry.name(), MergedTreeVal::Resolved(Some(entry.value()))));
) -> Box<dyn Iterator<Item = &'a RepoPathComponent> + 'a> { Either::Left(iter)
Box::new(iter1.merge(iter2).dedup()) } else {
// TODO: reimplement as entries iterator?
let iter = all_tree_basenames(trees).map(|name| (name, trees_value(trees, name)));
Either::Right(iter)
} }
merge_iters(all_tree_basenames(trees1), all_tree_basenames(trees2))
} }
fn merged_tree_entry_diff<'a>( fn merged_tree_entry_diff<'a>(
before: &'a Merge<Tree>, trees1: &'a Merge<Tree>,
after: &'a Merge<Tree>, trees2: &'a Merge<Tree>,
) -> impl Iterator<Item = (&'a RepoPathComponent, MergedTreeVal<'a>, MergedTreeVal<'a>)> { ) -> impl Iterator<Item = (&'a RepoPathComponent, MergedTreeVal<'a>, MergedTreeVal<'a>)> {
merged_tree_basenames(before, after).filter_map(|basename| { itertools::merge_join_by(
let value_before = trees_value(before, basename); all_tree_entries(trees1),
let value_after = trees_value(after, basename); all_tree_entries(trees2),
(value_after != value_before).then_some((basename, value_before, value_after)) |(name1, _), (name2, _)| name1.cmp(name2),
)
.map(|entry| match entry {
EitherOrBoth::Both((name, value1), (_, value2)) => (name, value1, value2),
EitherOrBoth::Left((name, value1)) => (name, value1, MergedTreeVal::Resolved(None)),
EitherOrBoth::Right((name, value2)) => (name, MergedTreeVal::Resolved(None), value2),
}) })
.filter(|(_, value1, value2)| value1 != value2)
} }
fn trees_value<'a>(trees: &'a Merge<Tree>, basename: &RepoPathComponent) -> MergedTreeVal<'a> { fn trees_value<'a>(trees: &'a Merge<Tree>, basename: &RepoPathComponent) -> MergedTreeVal<'a> {
@ -383,6 +392,8 @@ fn merge_trees(merge: &Merge<Tree>) -> BackendResult<Merge<Tree>> {
// any conflicts. // any conflicts.
let mut new_tree = backend::Tree::default(); let mut new_tree = backend::Tree::default();
let mut conflicts = vec![]; let mut conflicts = vec![];
// TODO: add all_tree_entries()-like function that doesn't change the arity
// of conflicts?
for basename in all_tree_basenames(merge) { for basename in all_tree_basenames(merge) {
let path_merge = merge.map(|tree| tree.value(basename).cloned()); let path_merge = merge.map(|tree| tree.value(basename).cloned());
let path = dir.join(basename); let path = dir.join(basename);
@ -467,9 +478,9 @@ impl TreeEntriesDirItem {
fn new(trees: &Merge<Tree>, matcher: &dyn Matcher) -> Self { fn new(trees: &Merge<Tree>, matcher: &dyn Matcher) -> Self {
let mut entries = vec![]; let mut entries = vec![];
let dir = trees.first().dir(); let dir = trees.first().dir();
for name in all_tree_basenames(trees) { for (name, value) in all_tree_entries(trees) {
let path = dir.join(name); let path = dir.join(name);
let value = trees_value(trees, name).to_merge(); let value = value.to_merge();
if value.is_tree() { if value.is_tree() {
// TODO: Handle the other cases (specific files and trees) // TODO: Handle the other cases (specific files and trees)
if matcher.visit(&path).is_nothing() { if matcher.visit(&path).is_nothing() {
@ -533,8 +544,8 @@ impl From<&Merge<Tree>> for ConflictsDirItem {
} }
let mut entries = vec![]; let mut entries = vec![];
for basename in all_tree_basenames(trees) { for (basename, value) in all_tree_entries(trees) {
match trees_value(trees, basename) { match value {
MergedTreeVal::Resolved(_) => {} MergedTreeVal::Resolved(_) => {}
MergedTreeVal::Conflict(tree_values) => { MergedTreeVal::Conflict(tree_values) => {
entries.push((dir.join(basename), tree_values)); entries.push((dir.join(basename), tree_values));