From c1060610bda276dce60cff69e37f059d80b7d99a Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Thu, 13 May 2021 21:26:45 -0700 Subject: [PATCH] working copy: optimize simple case of entire directory being ignored This makes the workging copy walk skip an entire ignored directory if there are no negative patterns later in the ignore file. That speeds up `jj st` in this repo with ~13k files in `target/` from ~100 ms to ~25 ms (6.0dB). This closes issue #8. --- lib/src/gitignore.rs | 39 +++++++++++++++++++++++++++++++++++++-- lib/src/working_copy.rs | 6 ++++-- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/lib/src/gitignore.rs b/lib/src/gitignore.rs index 68ec918d5..29cbdaad2 100644 --- a/lib/src/gitignore.rs +++ b/lib/src/gitignore.rs @@ -146,7 +146,7 @@ impl GitIgnoreLine { Ok(Some(GitIgnoreLine { is_negative, regex })) } - fn matches_file(&self, path: &str) -> bool { + fn matches(&self, path: &str) -> bool { self.regex.is_match(path) } } @@ -196,12 +196,32 @@ impl GitIgnoreFile { pub fn matches_file(&self, path: &str) -> bool { // Later lines take precedence, so check them in reverse for line in self.all_lines_reversed() { - if line.matches_file(path) { + if line.matches(path) { return !line.is_negative; } } false } + + pub fn matches_all_files_in(&self, dir: &str) -> bool { + // Later lines take precedence, so check them in reverse + assert!(dir.is_empty() || dir.ends_with('/')); + for line in self.all_lines_reversed() { + // Let's say there's a "/target/" pattern and then a "!interesting" pattern + // after it, then we can't say for sure that all files in target/ match. + // TODO: This can be smarter. For example, if there's a pattern "/foo/" followed + // by "!/bar/", then we can answer "true" for "foo/". A more complex + // case is if a pattern "/foo/" is followed "!/foo/bar/", then we + // can say "false" for "foo/" and "true" for "foo/baz/". + if line.is_negative { + return false; + } + if line.matches(dir) { + return true; + } + } + false + } } #[cfg(test)] @@ -214,6 +234,11 @@ mod tests { file.matches_file(path) } + fn matches_all_files_in(input: &[u8], path: &str) -> bool { + let file = GitIgnoreFile::empty().chain("", input).ok().unwrap(); + file.matches_all_files_in(path) + } + #[test] fn test_gitignore_empty_file() { let file = GitIgnoreFile::empty(); @@ -410,4 +435,14 @@ mod tests { assert!(file3.matches_file("foo/bar/baz")); assert!(!file3.matches_file("foo/bar/qux")); } + + #[test] + fn test_gitignore_match_dir() { + assert!(matches_all_files_in(b"foo\n", "foo/")); + assert!(matches_all_files_in(b"foo\nbar\n", "foo/")); + assert!(matches_all_files_in(b"!foo\nbar\n", "bar/")); + assert!(!matches_all_files_in(b"foo\n!bar\n", "foo/")); + // This one could return true, but it doesn't currently + assert!(!matches_all_files_in(b"foo\n!/bar\n", "foo/")); + } } diff --git a/lib/src/working_copy.rs b/lib/src/working_copy.rs index 2504ae20d..bb3d4f3e5 100644 --- a/lib/src/working_copy.rs +++ b/lib/src/working_copy.rs @@ -343,8 +343,10 @@ impl TreeState { } if file_type.is_dir() { let subdir = dir.join(&DirRepoPathComponent::from(name)); - let disk_subdir = disk_dir.join(file_name); - work.push((subdir, disk_subdir, git_ignore.clone())); + if !git_ignore.matches_all_files_in(&subdir.to_internal_string()) { + let disk_subdir = disk_dir.join(file_name); + work.push((subdir, disk_subdir, git_ignore.clone())); + } } else { let file = dir.join(&FileRepoPathComponent::from(name)); let disk_file = disk_dir.join(file_name);