Switch to ignore crate for gitignore handling.

Co-authored-by: Waleed Khan <me@waleedkhan.name>
This commit is contained in:
Daehyeok Mun 2024-02-16 09:54:09 -08:00 committed by Daehyeok Mun
parent 965d6ce4e4
commit a9f489ccdf
9 changed files with 201 additions and 203 deletions

30
Cargo.lock generated
View file

@ -1436,6 +1436,19 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "globset"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57da3b9b5b85bd66f31093f8c408b90a74431672542466497dcbdfdc02034be1"
dependencies = [
"aho-corasick",
"bstr",
"log",
"regex-automata 0.4.4",
"regex-syntax 0.8.2",
]
[[package]]
name = "half"
version = "1.8.2"
@ -1508,6 +1521,22 @@ dependencies = [
"unicode-normalization",
]
[[package]]
name = "ignore"
version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b46810df39e66e925525d6e38ce1e7f6e1d208f72dc39757880fcb66e2c58af1"
dependencies = [
"crossbeam-deque",
"globset",
"log",
"memchr",
"regex-automata 0.4.4",
"same-file",
"walkdir",
"winapi-util",
]
[[package]]
name = "indexmap"
version = "2.2.3"
@ -1674,6 +1703,7 @@ dependencies = [
"gix",
"glob",
"hex",
"ignore",
"insta",
"itertools 0.12.1",
"jj-lib-proc-macros",

View file

@ -54,6 +54,7 @@ gix = { version = "0.58.0", default-features = false, features = [
] }
glob = "0.3.1"
hex = "0.4.3"
ignore = "0.4.20"
indexmap = "2.2.3"
insta = { version = "1.34.0", features = ["filters"] }
itertools = "0.12.1"

View file

@ -222,7 +222,7 @@ impl LockedWorkingCopy for LockedConflictsWorkingCopy {
}
fn snapshot(&mut self, mut options: SnapshotOptions) -> Result<MergedTreeId, SnapshotError> {
options.base_ignores = options.base_ignores.chain("", "/.conflicts".as_bytes());
options.base_ignores = options.base_ignores.chain("", "/.conflicts".as_bytes())?;
self.inner.snapshot(options)
}

View file

@ -35,7 +35,7 @@ use jj_lib::backend::{BackendError, ChangeId, CommitId, MergedTreeId};
use jj_lib::commit::Commit;
use jj_lib::git::{GitConfigParseError, GitExportError, GitImportError, GitRemoteManagementError};
use jj_lib::git_backend::GitBackend;
use jj_lib::gitignore::GitIgnoreFile;
use jj_lib::gitignore::{GitIgnoreError, GitIgnoreFile};
use jj_lib::hex_util::to_reverse_hex;
use jj_lib::id_prefix::IdPrefixContext;
use jj_lib::matchers::{EverythingMatcher, Matcher, PrefixMatcher};
@ -481,6 +481,12 @@ impl From<WorkingCopyStateError> for CommandError {
}
}
impl From<GitIgnoreError> for CommandError {
fn from(err: GitIgnoreError) -> Self {
user_error_with_message("Failed to process .gitignore.", err)
}
}
#[derive(Clone)]
struct ChromeTracingFlushGuard {
_inner: Option<Rc<tracing_chrome::FlushGuard>>,
@ -1047,7 +1053,7 @@ impl WorkspaceCommandHelper {
}
#[instrument(skip_all)]
pub fn base_ignores(&self) -> Arc<GitIgnoreFile> {
pub fn base_ignores(&self) -> Result<Arc<GitIgnoreFile>, GitIgnoreError> {
fn get_excludes_file_path(config: &gix::config::File) -> Option<PathBuf> {
// TODO: maybe use path_by_key() and interpolate(), which can process non-utf-8
// path on Unix.
@ -1073,16 +1079,16 @@ impl WorkspaceCommandHelper {
if let Some(git_backend) = self.git_backend() {
let git_repo = git_backend.git_repo();
if let Some(excludes_file_path) = get_excludes_file_path(&git_repo.config_snapshot()) {
git_ignores = git_ignores.chain_with_file("", excludes_file_path);
git_ignores = git_ignores.chain_with_file("", excludes_file_path)?;
}
git_ignores = git_ignores
.chain_with_file("", git_backend.git_repo_path().join("info").join("exclude"));
.chain_with_file("", git_backend.git_repo_path().join("info").join("exclude"))?;
} else if let Ok(git_config) = gix::config::File::from_globals() {
if let Some(excludes_file_path) = get_excludes_file_path(&git_config) {
git_ignores = git_ignores.chain_with_file("", excludes_file_path);
git_ignores = git_ignores.chain_with_file("", excludes_file_path)?;
}
}
git_ignores
Ok(git_ignores)
}
pub fn resolve_single_op(&self, op_str: &str) -> Result<Operation, OpsetEvaluationError> {
@ -1353,7 +1359,7 @@ Set which revision the branch points to with `jj branch set {branch_name} -r <RE
// committing the working copy.
return Ok(());
};
let base_ignores = self.base_ignores();
let base_ignores = self.base_ignores()?;
// Compare working-copy tree and operation with repo's, and reload as needed.
let mut locked_ws = self.workspace.start_working_copy_mutation()?;
@ -1730,7 +1736,7 @@ impl WorkspaceCommandTransaction<'_> {
matcher: &dyn Matcher,
instructions: &str,
) -> Result<MergedTreeId, CommandError> {
let base_ignores = self.helper.base_ignores();
let base_ignores = self.helper.base_ignores()?;
let settings = &self.helper.settings;
Ok(crate::merge_tools::edit_diff(
ui,

View file

@ -46,7 +46,7 @@ pub(crate) fn cmd_untrack(
let matcher = workspace_command.matcher_from_values(&args.paths)?;
let mut tx = workspace_command.start_transaction().into_inner();
let base_ignores = workspace_command.base_ignores();
let base_ignores = workspace_command.base_ignores()?;
let (mut locked_ws, wc_commit) = workspace_command.start_working_copy_mutation()?;
// Create a new tree without the unwanted files
let mut tree_builder = MergedTreeBuilder::new(wc_commit.tree_id().clone());

View file

@ -36,6 +36,7 @@ git2 = { workspace = true }
gix = { workspace = true }
glob = { workspace = true }
hex = { workspace = true }
ignore = { workspace = true }
itertools = { workspace = true }
jj-lib-proc-macros = { workspace = true }
maplit = { workspace = true }

View file

@ -14,189 +14,99 @@
#![allow(missing_docs)]
use std::fs;
use std::path::PathBuf;
use std::sync::Arc;
use std::{fs, io};
use itertools::Itertools;
use regex::{escape as regex_escape, Regex};
use ignore::gitignore;
use thiserror::Error;
#[derive(Debug)]
struct GitIgnoreLine {
is_negative: bool,
regex: Regex,
}
impl GitIgnoreLine {
// Remove trailing spaces (unless backslash-escaped). Any character
// can be backslash-escaped as well.
fn remove_trailing_space(input: &str) -> &str {
let input = input.strip_suffix('\r').unwrap_or(input);
let mut it = input.char_indices().rev().peekable();
while let Some((i, c)) = it.next() {
if c != ' ' {
return &input[..i + c.len_utf8()];
}
if matches!(it.peek(), Some((_, '\\'))) {
if it.skip(1).take_while(|(_, b)| *b == '\\').count() % 2 == 1 {
return &input[..i];
}
return &input[..i + 1];
}
}
""
}
fn parse(prefix: &str, input: &str) -> Option<GitIgnoreLine> {
assert!(prefix.is_empty() || prefix.ends_with('/'));
if input.starts_with('#') {
return None;
}
let input = GitIgnoreLine::remove_trailing_space(input);
// Remove leading "!" before checking for empty to match git's implementation
// (i.e. just "!" matching nothing, not everything).
let (is_negative, input) = match input.strip_prefix('!') {
None => (false, input),
Some(rest) => (true, rest),
};
if input.is_empty() {
return None;
}
let (matches_only_directory, input) = match input.strip_suffix('/') {
None => (false, input),
Some(rest) => (true, rest),
};
let (mut is_rooted, input) = match input.strip_prefix('/') {
None => (false, input),
Some(rest) => (true, rest),
};
is_rooted |= input.contains('/');
let mut regex = String::new();
regex.push('^');
regex.push_str(prefix);
if !is_rooted {
regex.push_str("(.*/)?");
}
let components = input.split('/').collect_vec();
for (i, component) in components.iter().enumerate() {
if *component == "**" {
if i == components.len() - 1 {
regex.push_str(".*");
} else {
regex.push_str("(.*/)?");
}
} else {
let mut in_escape = false;
let mut character_class: Option<String> = None;
for c in component.chars() {
if in_escape {
in_escape = false;
if !matches!(c, ' ' | '#' | '!' | '?' | '\\' | '*') {
regex.push_str(&regex_escape("\\"));
}
regex.push_str(&regex_escape(&c.to_string()));
} else if c == '\\' {
in_escape = true;
} else if let Some(characters) = &mut character_class {
if c == ']' {
regex.push('[');
regex.push_str(characters);
regex.push(']');
character_class = None;
} else {
characters.push(c);
}
} else {
in_escape = false;
if c == '?' {
regex.push_str("[^/]");
} else if c == '*' {
regex.push_str("[^/]*");
} else if c == '[' {
character_class = Some(String::new());
} else {
regex.push_str(&regex_escape(&c.to_string()));
}
}
}
if in_escape {
regex.push_str(&regex_escape("\\"));
}
if i < components.len() - 1 {
regex.push('/');
}
}
}
if matches_only_directory {
regex.push_str("/.*");
} else {
regex.push_str("(/.*|$)");
}
let regex = Regex::new(&regex).unwrap();
Some(GitIgnoreLine { is_negative, regex })
}
fn matches(&self, path: &str) -> bool {
self.regex.is_match(path)
}
#[derive(Debug, Error)]
pub enum GitIgnoreError {
#[error("Failed to read ignore patterns from file {path}")]
ReadFile { path: PathBuf, source: io::Error },
#[error("invalid UTF-8 for ignore pattern in {path} on line #{line_num_for_display}: {line}")]
InvalidUtf8 {
path: PathBuf,
line_num_for_display: usize,
line: String,
source: std::str::Utf8Error,
},
#[error(transparent)]
Underlying(#[from] ignore::Error),
}
/// Models the effective contents of multiple .gitignore files.
#[derive(Debug)]
pub struct GitIgnoreFile {
parent: Option<Arc<GitIgnoreFile>>,
lines: Vec<GitIgnoreLine>,
path: String,
matchers: Vec<gitignore::Gitignore>,
}
impl GitIgnoreFile {
pub fn empty() -> Arc<GitIgnoreFile> {
Arc::new(GitIgnoreFile {
parent: None,
lines: vec![],
path: Default::default(),
matchers: Default::default(),
})
}
pub fn chain(self: &Arc<GitIgnoreFile>, prefix: &str, input: &[u8]) -> Arc<GitIgnoreFile> {
let mut lines = vec![];
for input_line in input.split(|b| *b == b'\n') {
// Skip non-utf8 lines
if let Ok(line_string) = String::from_utf8(input_line.to_vec()) {
if let Some(line) = GitIgnoreLine::parse(prefix, &line_string) {
lines.push(line);
}
}
pub fn chain(
self: &Arc<GitIgnoreFile>,
prefix: &str,
input: &[u8],
) -> Result<Arc<GitIgnoreFile>, GitIgnoreError> {
let path = self.path.clone() + prefix;
let mut builder = gitignore::GitignoreBuilder::new(&path);
for (i, input_line) in input.split(|b| *b == b'\n').enumerate() {
let line =
std::str::from_utf8(input_line).map_err(|err| GitIgnoreError::InvalidUtf8 {
path: PathBuf::from(&path),
line_num_for_display: i + 1,
line: String::from_utf8_lossy(input_line).to_string(),
source: err,
})?;
// FIXME: do we need to provide the `from` argument? Is it for providing
// diagnostics or correctness?
builder.add_line(None, line)?;
}
let matcher = builder.build()?;
let mut matchers = self.matchers.clone();
matchers.push(matcher);
Arc::new(GitIgnoreFile {
parent: Some(self.clone()),
lines,
})
Ok(Arc::new(GitIgnoreFile { path, matchers }))
}
pub fn chain_with_file(
self: &Arc<GitIgnoreFile>,
prefix: &str,
file: PathBuf,
) -> Arc<GitIgnoreFile> {
) -> Result<Arc<GitIgnoreFile>, GitIgnoreError> {
if file.is_file() {
let buf = fs::read(file).unwrap();
let buf = fs::read(&file).map_err(|err| GitIgnoreError::ReadFile {
path: file.clone(),
source: err,
})?;
self.chain(prefix, &buf)
} else {
self.clone()
Ok(self.clone())
}
}
fn all_lines_reversed<'a>(&'a self) -> Box<dyn Iterator<Item = &GitIgnoreLine> + 'a> {
if let Some(parent) = &self.parent {
Box::new(self.lines.iter().rev().chain(parent.all_lines_reversed()))
} else {
Box::new(self.lines.iter().rev())
}
fn matches_helper(&self, path: &str, is_dir: bool) -> bool {
self.matchers
.iter()
.rev()
.find_map(|matcher|
// TODO: the documentation warns that
// `matched_path_or_any_parents` is slower than `matched`;
// ideally, we would switch to that.
match matcher.matched_path_or_any_parents(path, is_dir) {
ignore::Match::None => None,
ignore::Match::Ignore(_) => Some(true),
ignore::Match::Whitelist(_) => Some(false),
})
.unwrap_or_default()
}
/// Returns whether specified path (not just file!) should be ignored. This
@ -207,13 +117,12 @@ impl GitIgnoreFile {
/// files within a ignored directory should be ignored unconditionally.
/// The code in this file does not take that into account.
pub fn matches(&self, path: &str) -> bool {
// Later lines take precedence, so check them in reverse
for line in self.all_lines_reversed() {
if line.matches(path) {
return !line.is_negative;
}
}
false
//If path ends with slash, consider it as a directory.
let (path, is_dir) = match path.strip_suffix('/') {
Some(path) => (path, true),
None => (path, false),
};
self.matches_helper(path, is_dir)
}
}
@ -223,7 +132,7 @@ mod tests {
use super::*;
fn matches(input: &[u8], path: &str) -> bool {
let file = GitIgnoreFile::empty().chain("", input);
let file = GitIgnoreFile::empty().chain("", input).unwrap();
file.matches(path)
}
@ -235,13 +144,13 @@ mod tests {
#[test]
fn test_gitignore_empty_file_with_prefix() {
let file = GitIgnoreFile::empty().chain("dir/", b"");
let file = GitIgnoreFile::empty().chain("dir/", b"").unwrap();
assert!(!file.matches("dir/foo"));
}
#[test]
fn test_gitignore_literal() {
let file = GitIgnoreFile::empty().chain("", b"foo\n");
let file = GitIgnoreFile::empty().chain("", b"foo\n").unwrap();
assert!(file.matches("foo"));
assert!(file.matches("dir/foo"));
assert!(file.matches("dir/subdir/foo"));
@ -251,17 +160,14 @@ mod tests {
#[test]
fn test_gitignore_literal_with_prefix() {
let file = GitIgnoreFile::empty().chain("dir/", b"foo\n");
// I consider it undefined whether a file in a parent directory matches, but
// let's test it anyway
assert!(!file.matches("foo"));
let file = GitIgnoreFile::empty().chain("./dir/", b"foo\n").unwrap();
assert!(file.matches("dir/foo"));
assert!(file.matches("dir/subdir/foo"));
}
#[test]
fn test_gitignore_pattern_same_as_prefix() {
let file = GitIgnoreFile::empty().chain("dir/", b"dir\n");
let file = GitIgnoreFile::empty().chain("dir/", b"dir\n").unwrap();
assert!(file.matches("dir/dir"));
// We don't want the "dir" pattern to apply to the parent directory
assert!(!file.matches("dir/foo"));
@ -269,24 +175,23 @@ mod tests {
#[test]
fn test_gitignore_rooted_literal() {
let file = GitIgnoreFile::empty().chain("", b"/foo\n");
let file = GitIgnoreFile::empty().chain("", b"/foo\n").unwrap();
assert!(file.matches("foo"));
assert!(!file.matches("dir/foo"));
}
#[test]
fn test_gitignore_rooted_literal_with_prefix() {
let file = GitIgnoreFile::empty().chain("dir/", b"/foo\n");
// I consider it undefined whether a file in a parent directory matches, but
// let's test it anyway
assert!(!file.matches("foo"));
let file = GitIgnoreFile::empty().chain("dir/", b"/foo\n").unwrap();
assert!(file.matches("dir/foo"));
assert!(!file.matches("dir/subdir/foo"));
}
#[test]
fn test_gitignore_deep_dir() {
let file = GitIgnoreFile::empty().chain("", b"/dir1/dir2/dir3\n");
let file = GitIgnoreFile::empty()
.chain("", b"/dir1/dir2/dir3\n")
.unwrap();
assert!(!file.matches("foo"));
assert!(!file.matches("dir1/foo"));
assert!(!file.matches("dir1/dir2/foo"));
@ -296,7 +201,7 @@ mod tests {
#[test]
fn test_gitignore_match_only_dir() {
let file = GitIgnoreFile::empty().chain("", b"/dir/\n");
let file = GitIgnoreFile::empty().chain("", b"/dir/\n").unwrap();
assert!(!file.matches("dir"));
assert!(file.matches("dir/foo"));
assert!(file.matches("dir/subdir/foo"));
@ -306,37 +211,64 @@ mod tests {
fn test_gitignore_unusual_symbols() {
assert!(matches(b"\\*\n", "*"));
assert!(!matches(b"\\*\n", "foo"));
assert!(matches(b"\\\n", "\\"));
assert!(matches(b"\\!\n", "!"));
assert!(matches(b"\\?\n", "?"));
assert!(!matches(b"\\?\n", "x"));
assert!(matches(b"\\w\n", "w"));
assert!(GitIgnoreFile::empty().chain("", b"\\\n").is_err());
}
#[test]
#[cfg(not(target_os = "windows"))]
fn teest_gitignore_backslash_path() {
assert!(!matches(b"/foo/bar", "/foo\\bar"));
assert!(!matches(b"/foo/bar", "/foo/bar\\"));
assert!(!matches(b"/foo/bar/", "/foo\\bar/"));
assert!(!matches(b"/foo/bar/", "/foo\\bar\\/"));
// Invalid escapes are treated like literal backslashes
assert!(!matches(b"\\w\n", "\\w"));
assert!(matches(b"\\\\ \n", "\\ "));
assert!(matches(b"\\\\\\ \n", "\\ "));
}
#[test]
#[cfg(target_os = "windows")]
/// ignore crate consider backslashes as a directory divider only on
/// Windows.
fn teest_gitignore_backslash_path() {
assert!(matches(b"/foo/bar", "/foo\\bar"));
assert!(matches(b"/foo/bar", "/foo/bar\\"));
assert!(matches(b"/foo/bar/", "/foo\\bar/"));
assert!(matches(b"/foo/bar/", "/foo\\bar\\/"));
assert!(matches(b"\\w\n", "\\w"));
assert!(!matches(b"\\w\n", "w"));
assert!(!matches(b"\\\\ \n", "\\ "));
assert!(!matches(b"\\\\\\ \n", "\\ "));
}
#[test]
fn test_gitignore_whitespace() {
assert!(!matches(b" \n", " "));
assert!(matches(b"\\ \n", " "));
assert!(matches(b"\\\\ \n", "\\"));
assert!(!matches(b"\\\\ \n", " "));
assert!(matches(b"\\\\\\ \n", "\\ "));
assert!(matches(b" a\n", " a"));
assert!(matches(b"a b\n", "a b"));
assert!(matches(b"a b \n", "a b"));
assert!(!matches(b"a b \n", "a b "));
assert!(matches(b"a b\\ \\ \n", "a b "));
// It's unclear how this should be interpreted, but we count spaces before
// escaped spaces
assert!(matches(b"a b \\ \n", "a b "));
// A single CR at EOL is ignored
// Trail CRs at EOL is ignored
assert!(matches(b"a\r\n", "a"));
assert!(!matches(b"a\r\n", "a\r"));
assert!(matches(b"a\r\r\n", "a\r"));
assert!(!matches(b"a\r\r\n", "a\r"));
assert!(matches(b"a\r\r\n", "a"));
assert!(!matches(b"a\r\r\n", "a\r\r"));
assert!(matches(b"a\r\r\n", "a"));
assert!(matches(b"\ra\n", "\ra"));
assert!(!matches(b"\ra\n", "a"));
assert!(GitIgnoreFile::empty().chain("", b"a b \\ \n").is_err());
}
#[test]
@ -375,10 +307,9 @@ mod tests {
#[test]
fn test_gitignore_leading_dir_glob_with_prefix() {
let file = GitIgnoreFile::empty().chain("dir1/dir2/", b"**/foo\n");
// I consider it undefined whether a file in a parent directory matches, but
// let's test it anyway
assert!(!file.matches("foo"));
let file = GitIgnoreFile::empty()
.chain("dir1/dir2/", b"**/foo\n")
.unwrap();
assert!(file.matches("dir1/dir2/foo"));
assert!(!file.matches("dir1/dir2/bar"));
assert!(file.matches("dir1/dir2/sub1/sub2/foo"));
@ -418,13 +349,14 @@ mod tests {
assert!(!matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar"));
assert!(matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/baz"));
assert!(!matches(b"foo\n!foo/bar\nfoo/bar/baz", "foo/bar/quux"));
assert!(!matches(b"foo/*\n!foo/bar", "foo/bar"));
}
#[test]
fn test_gitignore_file_ordering() {
let file1 = GitIgnoreFile::empty().chain("", b"foo\n");
let file2 = file1.chain("foo/", b"!bar");
let file3 = file2.chain("foo/bar/", b"baz");
let file1 = GitIgnoreFile::empty().chain("", b"foo\n").unwrap();
let file2 = file1.chain("foo/", b"!bar").unwrap();
let file3 = file2.chain("foo/bar/", b"baz").unwrap();
assert!(file1.matches("foo"));
assert!(file1.matches("foo/bar"));
assert!(!file2.matches("foo/bar"));
@ -432,4 +364,29 @@ mod tests {
assert!(file3.matches("foo/bar/baz"));
assert!(!file3.matches("foo/bar/qux"));
}
#[test]
fn test_gitignore_negative_parent_directory() {
// The following script shows that Git ignores the file:
//
// ```bash
// $ rm -rf test-repo && \
// git init test-repo &>/dev/null && \
// cd test-repo && \
// printf 'A/B.*\n!/A/\n' >.gitignore && \
// mkdir A && \
// touch A/B.ext && \
// git check-ignore A/B.ext
// A/B.ext
// ```
let ignore = GitIgnoreFile::empty()
.chain("", b"foo/bar.*\n!/foo/\n")
.unwrap();
assert!(ignore.matches("foo/bar.ext"));
let ignore = GitIgnoreFile::empty()
.chain("", b"!/foo/\nfoo/bar.*\n")
.unwrap();
assert!(ignore.matches("foo/bar.ext"));
}
}

View file

@ -868,8 +868,8 @@ impl TreeState {
return Ok(());
}
let git_ignore =
git_ignore.chain_with_file(&dir.to_internal_dir_string(), disk_dir.join(".gitignore"));
let git_ignore = git_ignore
.chain_with_file(&dir.to_internal_dir_string(), disk_dir.join(".gitignore"))?;
let dir_entries = disk_dir
.read_dir()
.unwrap()

View file

@ -25,7 +25,7 @@ use thiserror::Error;
use crate::backend::{BackendError, MergedTreeId};
use crate::commit::Commit;
use crate::fsmonitor::FsmonitorKind;
use crate::gitignore::GitIgnoreFile;
use crate::gitignore::{GitIgnoreError, GitIgnoreFile};
use crate::op_store::{OperationId, WorkspaceId};
use crate::repo_path::{RepoPath, RepoPathBuf};
use crate::settings::HumanByteSize;
@ -162,6 +162,9 @@ pub enum SnapshotError {
/// The maximum allowed size.
max_size: HumanByteSize,
},
/// Checking path with ignore patterns failed.
#[error(transparent)]
GitIgnoreError(#[from] GitIgnoreError),
/// Some other error happened while snapshotting the working copy.
#[error("{message}")]
Other {