From 12d7f8be169164d31f0b691500eeaf84b75eeeb7 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Sat, 25 Nov 2023 20:07:22 +0900 Subject: [PATCH] repo_path: turn RepoPath into String wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RepoPath::from_components() is removed since it is no longer a primitive function. The components iterator could be implemented on top of str::split(), but it's not as we'll probably want to add components.as_path() -> &RepoPath. Tree walking and tree_states map construction get slightly faster thanks to fewer allocations and/or better cache locality. If we add a borrowed RepoPath type, we can also implement a cheap &str to &RepoPath conversion on top. Then, we can get rid of BTreeMap construction at all. Snapshot without watchman: ``` % hyperfine --sort command --warmup 3 --runs 10 -L bin jj-0,jj-1 \ "target/release-with-debug/{bin} -R ~/mirrors/linux status" Benchmark 1: target/release-with-debug/jj-0 -R ~/mirrors/linux status Time (mean ± σ): 950.1 ms ± 24.9 ms [User: 1642.4 ms, System: 681.1 ms] Range (min … max): 913.8 ms … 990.9 ms 10 runs Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux status Time (mean ± σ): 872.1 ms ± 14.5 ms [User: 1922.3 ms, System: 625.8 ms] Range (min … max): 853.2 ms … 895.9 ms 10 runs Relative speed comparison 1.09 ± 0.03 target/release-with-debug/jj-0 -R ~/mirrors/linux status 1.00 target/release-with-debug/jj-1 -R ~/mirrors/linux status ``` Tree walk: ``` % hyperfine --sort command --warmup 3 --runs 10 -L bin jj-0,jj-1 \ "target/release-with-debug/{bin} -R ~/mirrors/linux files --ignore-working-copy" Benchmark 1: target/release-with-debug/jj-0 -R ~/mirrors/linux files --ignore-working-copy Time (mean ± σ): 375.3 ms ± 15.4 ms [User: 223.3 ms, System: 151.8 ms] Range (min … max): 359.4 ms … 394.1 ms 10 runs Benchmark 2: target/release-with-debug/jj-1 -R ~/mirrors/linux files --ignore-working-copy Time (mean ± σ): 357.1 ms ± 16.2 ms [User: 214.7 ms, System: 142.6 ms] Range (min … max): 341.6 ms … 378.9 ms 10 runs Relative speed comparison 1.05 ± 0.06 target/release-with-debug/jj-0 -R ~/mirrors/linux files --ignore-working-copy 1.00 target/release-with-debug/jj-1 -R ~/mirrors/linux files --ignore-working-copy ``` --- lib/src/repo_path.rs | 205 ++++++++++++++++++++++++---------- lib/tests/test_merge_trees.rs | 2 +- 2 files changed, 146 insertions(+), 61 deletions(-) diff --git a/lib/src/repo_path.rs b/lib/src/repo_path.rs index 7e8fcd0dd..04dd0332a 100644 --- a/lib/src/repo_path.rs +++ b/lib/src/repo_path.rs @@ -15,11 +15,12 @@ #![allow(missing_docs)] use std::borrow::Borrow; +use std::cmp::Ordering; use std::fmt::{Debug, Error, Formatter}; +use std::iter::FusedIterator; use std::ops::Deref; use std::path::{Component, Path, PathBuf}; -use itertools::Itertools; use ref_cast::{ref_cast_custom, RefCastCustom}; use thiserror::Error; @@ -112,25 +113,68 @@ impl ToOwned for RepoPathComponent { } /// Iterator over `RepoPath` components. -pub type RepoPathComponentsIter<'a> = std::iter::Map< - std::slice::Iter<'a, RepoPathComponentBuf>, - fn(&RepoPathComponentBuf) -> &RepoPathComponent, ->; +#[derive(Clone, Debug)] +pub struct RepoPathComponentsIter<'a> { + value: &'a str, +} -#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +impl RepoPathComponentsIter<'_> { + // TODO: add borrowed RepoPath type and implement as_path() instead + fn to_path(&self) -> RepoPath { + RepoPath { + value: self.value.to_owned(), + } + } +} + +impl<'a> Iterator for RepoPathComponentsIter<'a> { + type Item = &'a RepoPathComponent; + + fn next(&mut self) -> Option { + if self.value.is_empty() { + return None; + } + let (name, remainder) = self + .value + .split_once('/') + .unwrap_or_else(|| (&self.value, &self.value[self.value.len()..])); + self.value = remainder; + Some(RepoPathComponent::new_unchecked(name)) + } +} + +impl DoubleEndedIterator for RepoPathComponentsIter<'_> { + fn next_back(&mut self) -> Option { + if self.value.is_empty() { + return None; + } + let (remainder, name) = self + .value + .rsplit_once('/') + .unwrap_or_else(|| (&self.value[..0], &self.value)); + self.value = remainder; + Some(RepoPathComponent::new_unchecked(name)) + } +} + +impl FusedIterator for RepoPathComponentsIter<'_> {} + +#[derive(Clone, Eq, Hash, PartialEq)] pub struct RepoPath { - components: Vec, + value: String, } impl Debug for RepoPath { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - f.write_fmt(format_args!("{:?}", &self.to_internal_file_string())) + f.write_fmt(format_args!("{:?}", &self.value)) } } impl RepoPath { - pub fn root() -> Self { - RepoPath { components: vec![] } + pub const fn root() -> Self { + RepoPath { + value: String::new(), + } } /// Creates `RepoPath` from valid string representation. @@ -139,16 +183,8 @@ impl RepoPath { /// `"/"`, `"/foo"`, `"foo/"`, `"foo//bar"` are all invalid. pub fn from_internal_string(value: &str) -> Self { assert!(is_valid_repo_path_str(value)); - if value.is_empty() { - RepoPath::root() - } else { - let components = value - .split('/') - .map(|value| RepoPathComponentBuf { - value: value.to_string(), - }) - .collect(); - RepoPath { components } + RepoPath { + value: value.to_owned(), } } @@ -157,19 +193,23 @@ impl RepoPath { /// The input path should not contain `.` or `..`. pub fn from_relative_path(relative_path: impl AsRef) -> Option { let relative_path = relative_path.as_ref(); - let components = relative_path + let mut components = relative_path .components() .map(|c| match c { - Component::Normal(a) => Some(RepoPathComponentBuf::from(a.to_str().unwrap())), + Component::Normal(name) => Some(name.to_str().unwrap()), // TODO: better to return Err instead of None? _ => None, }) - .collect::>()?; - Some(RepoPath::from_components(components)) - } - - pub fn from_components(components: Vec) -> Self { - RepoPath { components } + .fuse(); + let mut value = String::with_capacity(relative_path.as_os_str().len()); + if let Some(name) = components.next() { + value.push_str(name?); + } + for name in components { + value.push('/'); + value.push_str(name?); + } + Some(RepoPath { value }) } /// Parses an `input` path into a `RepoPath` relative to `base`. @@ -197,67 +237,85 @@ impl RepoPath { /// trailing slash, unless this path represents the root directory. That /// way it can be concatenated with a basename and produce a valid path. pub fn to_internal_dir_string(&self) -> String { - let mut result = String::new(); - for component in &self.components { - result.push_str(component.as_str()); - result.push('/'); + if self.value.is_empty() { + String::new() + } else { + [&self.value, "/"].concat() } - result } /// The full string form used internally, not for presenting to users (where /// we may want to use the platform's separator). pub fn to_internal_file_string(&self) -> String { - let strings = self - .components - .iter() - .map(|component| component.value.clone()) - .collect_vec(); - strings.join("/") + self.value.to_owned() } pub fn to_fs_path(&self, base: &Path) -> PathBuf { - let repo_path_len: usize = self.components.iter().map(|x| x.as_str().len() + 1).sum(); - let mut result = PathBuf::with_capacity(base.as_os_str().len() + repo_path_len); + let mut result = PathBuf::with_capacity(base.as_os_str().len() + self.value.len() + 1); result.push(base); - result.extend(self.components.iter().map(|dir| &dir.value)); + result.extend(self.components().map(RepoPathComponent::as_str)); result } pub fn is_root(&self) -> bool { - self.components.is_empty() + self.value.is_empty() } + // TODO: might be better to add .starts_with() instead pub fn contains(&self, other: &RepoPath) -> bool { - other.components.starts_with(&self.components) + other.strip_prefix(self).is_some() + } + + // TODO: make it return borrowed RepoPath type + fn strip_prefix(&self, base: &RepoPath) -> Option<&str> { + if base.value.is_empty() { + Some(&self.value) + } else { + let tail = self.value.strip_prefix(&base.value)?; + if tail.is_empty() { + Some(tail) + } else { + tail.strip_prefix('/') + } + } } pub fn parent(&self) -> Option { - if self.is_root() { - None - } else { - Some(RepoPath { - components: self.components[0..self.components.len() - 1].to_vec(), - }) - } + self.split().map(|(parent, _)| parent) } pub fn split(&self) -> Option<(RepoPath, &RepoPathComponent)> { - if self.is_root() { - None - } else { - Some((self.parent().unwrap(), self.components.last().unwrap())) - } + let mut components = self.components(); + let basename = components.next_back()?; + Some((components.to_path(), basename)) } pub fn components(&self) -> RepoPathComponentsIter<'_> { - self.components.iter().map(AsRef::as_ref) + RepoPathComponentsIter { value: &self.value } } pub fn join(&self, entry: &RepoPathComponent) -> RepoPath { - let components = - itertools::chain(self.components.iter().cloned(), [entry.to_owned()]).collect(); - RepoPath { components } + let value = if self.value.is_empty() { + entry.as_str().to_owned() + } else { + [&self.value, "/", entry.as_str()].concat() + }; + RepoPath { value } + } +} + +impl Ord for RepoPath { + fn cmp(&self, other: &Self) -> Ordering { + // If there were leading/trailing slash, components-based Ord would + // disagree with str-based Eq. + debug_assert!(is_valid_repo_path_str(&self.value)); + self.components().cmp(other.components()) + } +} + +impl PartialOrd for RepoPath { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) } } @@ -279,6 +337,8 @@ fn is_valid_repo_path_str(value: &str) -> bool { mod tests { use std::panic; + use itertools::Itertools as _; + use super::*; fn repo_path(value: &str) -> RepoPath { @@ -308,6 +368,31 @@ mod tests { assert_eq!(repo_path("dir/file").to_internal_file_string(), "dir/file"); } + #[test] + fn test_to_internal_dir_string() { + assert_eq!(RepoPath::root().to_internal_dir_string(), ""); + assert_eq!(repo_path("dir").to_internal_dir_string(), "dir/"); + assert_eq!(repo_path("dir/file").to_internal_dir_string(), "dir/file/"); + } + + #[test] + fn test_contains() { + assert!(repo_path("").contains(&repo_path(""))); + assert!(repo_path("").contains(&repo_path("x"))); + assert!(!repo_path("x").contains(&repo_path(""))); + + assert!(repo_path("x").contains(&repo_path("x"))); + assert!(repo_path("x").contains(&repo_path("x/y"))); + assert!(!repo_path("x").contains(&repo_path("xy"))); + assert!(!repo_path("y").contains(&repo_path("x/y"))); + + assert!(repo_path("x/y").contains(&repo_path("x/y"))); + assert!(repo_path("x/y").contains(&repo_path("x/y/z"))); + assert!(!repo_path("x/y").contains(&repo_path("x/yz"))); + assert!(!repo_path("x/y").contains(&repo_path("x"))); + assert!(!repo_path("x/y").contains(&repo_path("xy"))); + } + #[test] fn test_order() { assert!(RepoPath::root() < repo_path("dir")); diff --git a/lib/tests/test_merge_trees.rs b/lib/tests/test_merge_trees.rs index fcea5d8d4..0b139bbfd 100644 --- a/lib/tests/test_merge_trees.rs +++ b/lib/tests/test_merge_trees.rs @@ -495,7 +495,7 @@ fn test_simplify_conflict() { match further_rebased_tree.value(component).unwrap() { TreeValue::Conflict(id) => { let conflict = store - .read_conflict(&RepoPath::from_components(vec![component.to_owned()]), id) + .read_conflict(&RepoPath::from_internal_string(component.as_str()), id) .unwrap(); assert_eq!( conflict.removes().map(|v| v.as_ref()).collect_vec(),