working_copy: don't build intermediate HashMap from proto file states

According to the doc, this is compatible with the map syntax.
https://protobuf.dev/programming-guides/proto3/#maps

This change means that the serialized file states are sorted by RepoPath,
so BTreeMap<RepoPath, _> can be reconstructed with fewer cache misses.

In my "linux" repo (watchman enabled):
 - jj-0: baseline
 - jj-1: this

    % hyperfine --sort command --warmup 3 --runs 10 -L bin jj-0,jj-1,jj-2 \
      "target/release-with-debug/{bin} -R ~/mirrors/linux status"
    Benchmark 1: target/release-with-debug/jj-0 -R ~/mirrors/linux status
      Time (mean ± σ):      1.034 s ±  0.020 s    [User: 0.881 s, System: 0.212 s]
      Range (min … max):    1.011 s …  1.068 s    10 runs

    Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux status
      Time (mean ± σ):     849.3 ms ±  13.8 ms    [User: 710.7 ms, System: 199.3 ms]
      Range (min … max):   821.7 ms … 870.2 ms    10 runs

    Relative speed comparison
            1.32 ±  0.04  target/release-with-debug/jj-0 -R ~/mirrors/linux status
            1.08 ±  0.03  target/release-with-debug/jj-1 -R ~/mirrors/linux status

Cache-misses got reduced:

    % perf stat -e task-clock,cycles,instructions,cache-references,cache-misses \
      -- ./target/release-with-debug/jj-0 -R ~/mirrors/linux --no-pager status

              1,091.68 msec task-clock                       #    1.032 CPUs utilized
         4,179,596,978      cycles                           #    3.829 GHz
         6,166,231,489      instructions                     #    1.48  insn per cycle
           134,032,047      cache-references                 #  122.776 M/sec
            29,322,707      cache-misses                     #   21.88% of all cache refs

           1.057474164 seconds time elapsed

           0.897042000 seconds user
           0.194819000 seconds sys

    % perf stat -e task-clock,cycles,instructions,cache-references,cache-misses \
      -- ./target/release-with-debug/jj-1 -R ~/mirrors/linux --no-pager status

                927.05 msec task-clock                       #    1.083 CPUs utilized
         3,451,299,198      cycles                           #    3.723 GHz
         6,222,418,272      instructions                     #    1.80  insn per cycle
            98,499,363      cache-references                 #  106.251 M/sec
            11,998,523      cache-misses                     #   12.18% of all cache refs

           0.855938336 seconds time elapsed

           0.720568000 seconds user
           0.207924000 seconds sys
This commit is contained in:
Yuya Nishihara 2023-11-18 21:20:49 +09:00
parent 56047cb7ec
commit ee6a1e2c0a
3 changed files with 31 additions and 17 deletions

View file

@ -15,7 +15,7 @@
#![allow(missing_docs)]
use std::any::Any;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::collections::{BTreeMap, HashSet};
use std::error::Error;
use std::fs;
use std::fs::{File, Metadata, OpenOptions};
@ -195,12 +195,12 @@ fn file_state_to_proto(file_state: &FileState) -> crate::protos::working_copy::F
}
fn file_states_from_proto(
proto: &HashMap<String, crate::protos::working_copy::FileState>,
proto: &[crate::protos::working_copy::FileStateEntry],
) -> BTreeMap<RepoPath, FileState> {
let mut file_states = BTreeMap::new();
for (path_str, proto_file_state) in proto {
let path = RepoPath::from_internal_string(path_str);
file_states.insert(path, file_state_from_proto(proto_file_state));
for entry in proto {
let path = RepoPath::from_internal_string(&entry.path);
file_states.insert(path, file_state_from_proto(entry.state.as_ref().unwrap()));
}
file_states
}
@ -465,12 +465,16 @@ impl TreeState {
}
}
for (file, file_state) in &self.file_states {
proto.file_states.insert(
file.to_internal_file_string(),
file_state_to_proto(file_state),
);
}
proto.file_states = self
.file_states
.iter()
.map(
|(path, state)| crate::protos::working_copy::FileStateEntry {
path: path.to_internal_file_string(),
state: Some(file_state_to_proto(state)),
},
)
.collect();
let mut sparse_patterns = crate::protos::working_copy::SparsePatterns::default();
for path in &self.sparse_patterns {
sparse_patterns

View file

@ -32,6 +32,11 @@ message FileState {
bytes conflict_id = 4 [deprecated = true];
}
message FileStateEntry {
string path = 1;
FileState state = 2;
}
message SparsePatterns {
repeated string prefixes = 1;
}
@ -41,7 +46,7 @@ message TreeState {
// Alternating positive and negative terms if there's a conflict, otherwise a
// single (positive) value
repeated bytes tree_ids = 5;
map<string, FileState> file_states = 2;
repeated FileStateEntry file_states = 2;
SparsePatterns sparse_patterns = 3;
WatchmanClock watchman_clock = 4;
}

View file

@ -14,6 +14,14 @@ pub struct FileState {
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct FileStateEntry {
#[prost(string, tag = "1")]
pub path: ::prost::alloc::string::String,
#[prost(message, optional, tag = "2")]
pub state: ::core::option::Option<FileState>,
}
#[allow(clippy::derive_partial_eq_without_eq)]
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct SparsePatterns {
#[prost(string, repeated, tag = "1")]
pub prefixes: ::prost::alloc::vec::Vec<::prost::alloc::string::String>,
@ -27,11 +35,8 @@ pub struct TreeState {
/// single (positive) value
#[prost(bytes = "vec", repeated, tag = "5")]
pub tree_ids: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec<u8>>,
#[prost(map = "string, message", tag = "2")]
pub file_states: ::std::collections::HashMap<
::prost::alloc::string::String,
FileState,
>,
#[prost(message, repeated, tag = "2")]
pub file_states: ::prost::alloc::vec::Vec<FileStateEntry>,
#[prost(message, optional, tag = "3")]
pub sparse_patterns: ::core::option::Option<SparsePatterns>,
#[prost(message, optional, tag = "4")]