From ee6a1e2c0a7004e79186d6727e2acdf4cd4ea680 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sat, 18 Nov 2023 21:20:49 +0900 Subject: [PATCH] working_copy: don't build intermediate HashMap from proto file states MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the doc, this is compatible with the map syntax. https://protobuf.dev/programming-guides/proto3/#maps This change means that the serialized file states are sorted by RepoPath, so BTreeMap can be reconstructed with fewer cache misses. In my "linux" repo (watchman enabled): - jj-0: baseline - jj-1: this % hyperfine --sort command --warmup 3 --runs 10 -L bin jj-0,jj-1,jj-2 \ "target/release-with-debug/{bin} -R ~/mirrors/linux status" Benchmark 1: target/release-with-debug/jj-0 -R ~/mirrors/linux status Time (mean ± σ): 1.034 s ± 0.020 s [User: 0.881 s, System: 0.212 s] Range (min … max): 1.011 s … 1.068 s 10 runs Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux status Time (mean ± σ): 849.3 ms ± 13.8 ms [User: 710.7 ms, System: 199.3 ms] Range (min … max): 821.7 ms … 870.2 ms 10 runs Relative speed comparison 1.32 ± 0.04 target/release-with-debug/jj-0 -R ~/mirrors/linux status 1.08 ± 0.03 target/release-with-debug/jj-1 -R ~/mirrors/linux status Cache-misses got reduced: % perf stat -e task-clock,cycles,instructions,cache-references,cache-misses \ -- ./target/release-with-debug/jj-0 -R ~/mirrors/linux --no-pager status 1,091.68 msec task-clock # 1.032 CPUs utilized 4,179,596,978 cycles # 3.829 GHz 6,166,231,489 instructions # 1.48 insn per cycle 134,032,047 cache-references # 122.776 M/sec 29,322,707 cache-misses # 21.88% of all cache refs 1.057474164 seconds time elapsed 0.897042000 seconds user 0.194819000 seconds sys % perf stat -e task-clock,cycles,instructions,cache-references,cache-misses \ -- ./target/release-with-debug/jj-1 -R ~/mirrors/linux --no-pager status 927.05 msec task-clock # 1.083 CPUs utilized 3,451,299,198 cycles # 3.723 GHz 6,222,418,272 instructions # 1.80 insn per cycle 98,499,363 cache-references # 106.251 M/sec 11,998,523 cache-misses # 12.18% of all cache refs 0.855938336 seconds time elapsed 0.720568000 seconds user 0.207924000 seconds sys --- lib/src/local_working_copy.rs | 26 +++++++++++++++----------- lib/src/protos/working_copy.proto | 7 ++++++- lib/src/protos/working_copy.rs | 15 ++++++++++----- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/lib/src/local_working_copy.rs b/lib/src/local_working_copy.rs index 3951c3ad3..bc7c34bcc 100644 --- a/lib/src/local_working_copy.rs +++ b/lib/src/local_working_copy.rs @@ -15,7 +15,7 @@ #![allow(missing_docs)] use std::any::Any; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashSet}; use std::error::Error; use std::fs; use std::fs::{File, Metadata, OpenOptions}; @@ -195,12 +195,12 @@ fn file_state_to_proto(file_state: &FileState) -> crate::protos::working_copy::F } fn file_states_from_proto( - proto: &HashMap, + proto: &[crate::protos::working_copy::FileStateEntry], ) -> BTreeMap { let mut file_states = BTreeMap::new(); - for (path_str, proto_file_state) in proto { - let path = RepoPath::from_internal_string(path_str); - file_states.insert(path, file_state_from_proto(proto_file_state)); + for entry in proto { + let path = RepoPath::from_internal_string(&entry.path); + file_states.insert(path, file_state_from_proto(entry.state.as_ref().unwrap())); } file_states } @@ -465,12 +465,16 @@ impl TreeState { } } - for (file, file_state) in &self.file_states { - proto.file_states.insert( - file.to_internal_file_string(), - file_state_to_proto(file_state), - ); - } + proto.file_states = self + .file_states + .iter() + .map( + |(path, state)| crate::protos::working_copy::FileStateEntry { + path: path.to_internal_file_string(), + state: Some(file_state_to_proto(state)), + }, + ) + .collect(); let mut sparse_patterns = crate::protos::working_copy::SparsePatterns::default(); for path in &self.sparse_patterns { sparse_patterns diff --git a/lib/src/protos/working_copy.proto b/lib/src/protos/working_copy.proto index 3220e9ea2..1fc658b48 100644 --- a/lib/src/protos/working_copy.proto +++ b/lib/src/protos/working_copy.proto @@ -32,6 +32,11 @@ message FileState { bytes conflict_id = 4 [deprecated = true]; } +message FileStateEntry { + string path = 1; + FileState state = 2; +} + message SparsePatterns { repeated string prefixes = 1; } @@ -41,7 +46,7 @@ message TreeState { // Alternating positive and negative terms if there's a conflict, otherwise a // single (positive) value repeated bytes tree_ids = 5; - map file_states = 2; + repeated FileStateEntry file_states = 2; SparsePatterns sparse_patterns = 3; WatchmanClock watchman_clock = 4; } diff --git a/lib/src/protos/working_copy.rs b/lib/src/protos/working_copy.rs index 30d7b82f7..7d3016545 100644 --- a/lib/src/protos/working_copy.rs +++ b/lib/src/protos/working_copy.rs @@ -14,6 +14,14 @@ pub struct FileState { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct FileStateEntry { + #[prost(string, tag = "1")] + pub path: ::prost::alloc::string::String, + #[prost(message, optional, tag = "2")] + pub state: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct SparsePatterns { #[prost(string, repeated, tag = "1")] pub prefixes: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, @@ -27,11 +35,8 @@ pub struct TreeState { /// single (positive) value #[prost(bytes = "vec", repeated, tag = "5")] pub tree_ids: ::prost::alloc::vec::Vec<::prost::alloc::vec::Vec>, - #[prost(map = "string, message", tag = "2")] - pub file_states: ::std::collections::HashMap< - ::prost::alloc::string::String, - FileState, - >, + #[prost(message, repeated, tag = "2")] + pub file_states: ::prost::alloc::vec::Vec, #[prost(message, optional, tag = "3")] pub sparse_patterns: ::core::option::Option, #[prost(message, optional, tag = "4")]