wip: switch to ignore crate for gitignore handling

I added the test `test_gitignore_negative_parent_directory` which fails in our gitignore implementation but passes with the `ignore` implementation.
This commit is contained in:
Waleed Khan 2023-11-01 18:39:36 -07:00
parent f00f7527dd
commit 0a81f54f09
9 changed files with 179 additions and 198 deletions

37
Cargo.lock generated
View file

@ -674,6 +674,12 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
[[package]]
name = "fnv"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "form_urlencoded"
version = "1.2.0"
@ -834,6 +840,19 @@ version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "globset"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "759c97c1e17c55525b57192c06a267cda0ac5210b222d6b82189a2338fa1c13d"
dependencies = [
"aho-corasick",
"bstr",
"fnv",
"log",
"regex",
]
[[package]]
name = "half"
version = "1.8.2"
@ -906,6 +925,23 @@ dependencies = [
"unicode-normalization",
]
[[package]]
name = "ignore"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbe7873dab538a9a44ad79ede1faf5f30d49f9a5c883ddbab48bce81b64b7492"
dependencies = [
"globset",
"lazy_static",
"log",
"memchr",
"regex",
"same-file",
"thread_local",
"walkdir",
"winapi-util",
]
[[package]]
name = "indexmap"
version = "2.1.0"
@ -1052,6 +1088,7 @@ dependencies = [
"git2",
"glob",
"hex",
"ignore",
"insta",
"itertools 0.11.0",
"maplit",

View file

@ -44,8 +44,9 @@ futures = "0.3.29"
glob = "0.3.1"
git2 = "0.17.2"
hex = "0.4.3"
itertools = "0.11.0"
ignore = "0.4.20"
indexmap = "2.1.0"
itertools = "0.11.0"
libc = { version = "0.2.149" }
insta = { version = "1.34.0", features = ["filters"] }
maplit = "1.0.2"

View file

@ -205,7 +205,7 @@ impl LockedWorkingCopy for LockedConflictsWorkingCopy {
}
fn snapshot(&mut self, mut options: SnapshotOptions) -> Result<MergedTreeId, SnapshotError> {
options.base_ignores = options.base_ignores.chain("", "/.conflicts".as_bytes());
options.base_ignores = options.base_ignores.chain("", "/.conflicts".as_bytes())?;
self.inner.snapshot(options)
}

View file

@ -38,7 +38,7 @@ use jj_lib::git::{
GitImportStats, GitRemoteManagementError,
};
use jj_lib::git_backend::GitBackend;
use jj_lib::gitignore::GitIgnoreFile;
use jj_lib::gitignore::{GitIgnoreError, GitIgnoreFile};
use jj_lib::hex_util::to_reverse_hex;
use jj_lib::id_prefix::IdPrefixContext;
use jj_lib::matchers::{EverythingMatcher, Matcher, PrefixMatcher, Visit};
@ -398,6 +398,12 @@ impl From<WorkingCopyStateError> for CommandError {
}
}
impl From<GitIgnoreError> for CommandError {
fn from(err: GitIgnoreError) -> Self {
user_error(format!("Failed to process .gitignore: {err}"))
}
}
#[derive(Clone)]
struct ChromeTracingFlushGuard {
_inner: Option<Rc<tracing_chrome::FlushGuard>>,
@ -935,7 +941,7 @@ impl WorkspaceCommandHelper {
}
#[instrument(skip_all)]
pub fn base_ignores(&self) -> Arc<GitIgnoreFile> {
pub fn base_ignores(&self) -> Result<Arc<GitIgnoreFile>, GitIgnoreError> {
fn xdg_config_home() -> Result<PathBuf, VarError> {
if let Ok(x) = std::env::var("XDG_CONFIG_HOME") {
if !x.is_empty() {
@ -955,13 +961,13 @@ impl WorkspaceCommandHelper {
})
.or_else(|_| xdg_config_home().map(|x| x.join("git").join("ignore")))
{
git_ignores = git_ignores.chain_with_file("", excludes_file_path);
git_ignores = git_ignores.chain_with_file("", excludes_file_path)?;
}
if let Some(git_backend) = self.git_backend() {
git_ignores = git_ignores
.chain_with_file("", git_backend.git_repo_path().join("info").join("exclude"));
.chain_with_file("", git_backend.git_repo_path().join("info").join("exclude"))?;
}
git_ignores
Ok(git_ignores)
}
pub fn resolve_single_op(&self, op_str: &str) -> Result<Operation, CommandError> {
@ -1308,7 +1314,7 @@ Set which revision the branch points to with `jj branch set {branch_name} -r <RE
// committing the working copy.
return Ok(());
};
let base_ignores = self.base_ignores();
let base_ignores = self.base_ignores()?;
// Compare working-copy tree and operation with repo's, and reload as needed.
let mut locked_ws = self.workspace.start_working_copy_mutation()?;
@ -1547,7 +1553,7 @@ impl WorkspaceCommandTransaction<'_> {
matcher: &dyn Matcher,
instructions: &str,
) -> Result<MergedTreeId, CommandError> {
let base_ignores = self.helper.base_ignores();
let base_ignores = self.helper.base_ignores()?;
let settings = &self.helper.settings;
Ok(crate::merge_tools::edit_diff(
ui,

View file

@ -497,7 +497,7 @@ fn cmd_untrack(
let mut tx = workspace_command
.start_transaction("untrack paths")
.into_inner();
let base_ignores = workspace_command.base_ignores();
let base_ignores = workspace_command.base_ignores()?;
let (mut locked_ws, wc_commit) = workspace_command.start_working_copy_mutation()?;
// Create a new tree without the unwanted files
let mut tree_builder = MergedTreeBuilder::new(wc_commit.tree_id().clone());

View file

@ -27,11 +27,12 @@ bytes = { workspace = true }
chrono = { workspace = true }
config = { workspace = true }
digest = { workspace = true }
futures = { workspace = true }
either = { workspace = true }
futures = { workspace = true }
git2 = { workspace = true }
glob = { workspace = true }
hex = { workspace = true }
ignore = { workspace = true }
itertools = { workspace = true }
maplit = { workspace = true }
once_cell = { workspace = true }

View file

@ -14,189 +14,99 @@
#![allow(missing_docs)]
use std::fs;
use std::{fs, io};
use std::path::PathBuf;
use std::sync::Arc;
use itertools::Itertools;
use regex::{escape as regex_escape, Regex};
use ignore::gitignore;
use thiserror::Error;
#[derive(Debug)]
struct GitIgnoreLine {
is_negative: bool,
regex: Regex,
}
impl GitIgnoreLine {
// Remove trailing spaces (unless backslash-escaped). Any character
// can be backslash-escaped as well.
fn remove_trailing_space(input: &str) -> &str {
let input = input.strip_suffix('\r').unwrap_or(input);
let mut it = input.char_indices().rev().peekable();
while let Some((i, c)) = it.next() {
if c != ' ' {
return &input[..i + c.len_utf8()];
}
if matches!(it.peek(), Some((_, '\\'))) {
if it.skip(1).take_while(|(_, b)| *b == '\\').count() % 2 == 1 {
return &input[..i];
}
return &input[..i + 1];
}
}
""
}
fn parse(prefix: &str, input: &str) -> Option<GitIgnoreLine> {
assert!(prefix.is_empty() || prefix.ends_with('/'));
if input.starts_with('#') {
return None;
}
let input = GitIgnoreLine::remove_trailing_space(input);
// Remove leading "!" before checking for empty to match git's implementation
// (i.e. just "!" matching nothing, not everything).
let (is_negative, input) = match input.strip_prefix('!') {
None => (false, input),
Some(rest) => (true, rest),
};
if input.is_empty() {
return None;
}
let (matches_only_directory, input) = match input.strip_suffix('/') {
None => (false, input),
Some(rest) => (true, rest),
};
let (mut is_rooted, input) = match input.strip_prefix('/') {
None => (false, input),
Some(rest) => (true, rest),
};
is_rooted |= input.contains('/');
let mut regex = String::new();
regex.push('^');
regex.push_str(prefix);
if !is_rooted {
regex.push_str("(.*/)?");
}
let components = input.split('/').collect_vec();
for (i, component) in components.iter().enumerate() {
if *component == "**" {
if i == components.len() - 1 {
regex.push_str(".*");
} else {
regex.push_str("(.*/)?");
}
} else {
let mut in_escape = false;
let mut character_class: Option<String> = None;
for c in component.chars() {
if in_escape {
in_escape = false;
if !matches!(c, ' ' | '#' | '!' | '?' | '\\' | '*') {
regex.push_str(&regex_escape("\\"));
}
regex.push_str(&regex_escape(&c.to_string()));
} else if c == '\\' {
in_escape = true;
} else if let Some(characters) = &mut character_class {
if c == ']' {
regex.push('[');
regex.push_str(characters);
regex.push(']');
character_class = None;
} else {
characters.push(c);
}
} else {
in_escape = false;
if c == '?' {
regex.push_str("[^/]");
} else if c == '*' {
regex.push_str("[^/]*");
} else if c == '[' {
character_class = Some(String::new());
} else {
regex.push_str(&regex_escape(&c.to_string()));
}
}
}
if in_escape {
regex.push_str(&regex_escape("\\"));
}
if i < components.len() - 1 {
regex.push('/');
}
}
}
if matches_only_directory {
regex.push_str("/.*");
} else {
regex.push_str("(/.*|$)");
}
let regex = Regex::new(&regex).unwrap();
Some(GitIgnoreLine { is_negative, regex })
}
fn matches(&self, path: &str) -> bool {
self.regex.is_match(path)
}
#[derive(Debug, Error)]
pub enum GitIgnoreError {
#[error("failed to read ignore patterns from file {path}: {source}")]
ReadFile {
path: PathBuf,
source: io::Error,
},
#[error("invalid UTF-8 for ignore pattern in {path} on line #{line_num_for_display} {line}")]
InvalidUtf8 {
path: PathBuf,
line_num_for_display: usize,
line: String,
source: std::str::Utf8Error,
},
#[error(transparent)]
Underlying(#[from] ignore::Error),
}
/// Models the effective contents of multiple .gitignore files.
#[derive(Debug)]
pub struct GitIgnoreFile {
parent: Option<Arc<GitIgnoreFile>>,
lines: Vec<GitIgnoreLine>,
path: String,
matchers: Vec<gitignore::Gitignore>,
}
impl GitIgnoreFile {
pub fn empty() -> Arc<GitIgnoreFile> {
Arc::new(GitIgnoreFile {
parent: None,
lines: vec![],
path: Default::default(),
matchers: Default::default(),
})
}
pub fn chain(self: &Arc<GitIgnoreFile>, prefix: &str, input: &[u8]) -> Arc<GitIgnoreFile> {
let mut lines = vec![];
for input_line in input.split(|b| *b == b'\n') {
// Skip non-utf8 lines
if let Ok(line_string) = String::from_utf8(input_line.to_vec()) {
if let Some(line) = GitIgnoreLine::parse(prefix, &line_string) {
lines.push(line);
}
}
pub fn chain(
self: &Arc<GitIgnoreFile>,
prefix: &str,
input: &[u8],
) -> Result<Arc<GitIgnoreFile>, GitIgnoreError> {
let path = self.path.clone() + prefix;
let mut builder = gitignore::GitignoreBuilder::new(&path);
for (i, input_line) in input.split(|b| *b == b'\n').enumerate() {
let line =
std::str::from_utf8(input_line).map_err(|err| GitIgnoreError::InvalidUtf8 {
path: PathBuf::from(&path),
line_num_for_display: i + 1,
line: String::from_utf8_lossy(input_line).to_string(),
source: err,
})?;
// TODO: do we need to provide the `from` argument? Is it for providing diagnostics or correctness?
builder.add_line(None, line)?;
}
Arc::new(GitIgnoreFile {
parent: Some(self.clone()),
lines,
})
let matcher = builder.build()?;
let mut matchers = self.matchers.clone();
matchers.push(matcher);
Ok(Arc::new(GitIgnoreFile { path, matchers }))
}
pub fn chain_with_file(
self: &Arc<GitIgnoreFile>,
prefix: &str,
file: PathBuf,
) -> Arc<GitIgnoreFile> {
) -> Result<Arc<GitIgnoreFile>, GitIgnoreError> {
if file.is_file() {
let buf = fs::read(file).unwrap();
let buf = fs::read(&file).map_err(|err| GitIgnoreError::ReadFile { path: file.clone(), source: err })?;
self.chain(prefix, &buf)
} else {
self.clone()
Ok(self.clone())
}
}
fn all_lines_reversed<'a>(&'a self) -> Box<dyn Iterator<Item = &GitIgnoreLine> + 'a> {
if let Some(parent) = &self.parent {
Box::new(self.lines.iter().rev().chain(parent.all_lines_reversed()))
} else {
Box::new(self.lines.iter().rev())
}
fn matches_helper(&self, path: &str, is_dir: bool) -> bool {
self.matchers
.iter()
.rev()
.find_map(|matcher|
// TODO: the documentation warns that
// `matched_path_or_any_parents` is slower than `matched`;
// ideally, we would switch to that.
match matcher.matched_path_or_any_parents(path, is_dir) {
ignore::Match::None => None,
ignore::Match::Ignore(_) => Some(true),
ignore::Match::Whitelist(_) => Some(false),
})
.unwrap_or_default()
}
/// Returns whether specified path (not just file!) should be ignored. This
@ -206,14 +116,14 @@ impl GitIgnoreFile {
/// ignored in the repository should take into account that all (untracked)
/// files within a ignored directory should be ignored unconditionally.
/// The code in this file does not take that into account.
/// @nocommit: rename to `matches_file` and update doc-comment above
pub fn matches(&self, path: &str) -> bool {
// Later lines take precedence, so check them in reverse
for line in self.all_lines_reversed() {
if line.matches(path) {
return !line.is_negative;
}
}
false
self.matches_helper(path, false)
}
pub fn matches_dir(&self, path: &str) -> bool {
// @nocommit: unclear if this works/has a behavioral difference in any test
self.matches_helper(path, true)
}
}
@ -222,8 +132,9 @@ mod tests {
use super::*;
#[track_caller]
fn matches(input: &[u8], path: &str) -> bool {
let file = GitIgnoreFile::empty().chain("", input);
let file = GitIgnoreFile::empty().chain("", input).unwrap();
file.matches(path)
}
@ -235,13 +146,13 @@ mod tests {
#[test]
fn test_gitignore_empty_file_with_prefix() {
let file = GitIgnoreFile::empty().chain("dir/", b"");
let file = GitIgnoreFile::empty().chain("dir/", b"").unwrap();
assert!(!file.matches("dir/foo"));
}
#[test]
fn test_gitignore_literal() {
let file = GitIgnoreFile::empty().chain("", b"foo\n");
let file = GitIgnoreFile::empty().chain("", b"foo\n").unwrap();
assert!(file.matches("foo"));
assert!(file.matches("dir/foo"));
assert!(file.matches("dir/subdir/foo"));
@ -251,17 +162,14 @@ mod tests {
#[test]
fn test_gitignore_literal_with_prefix() {
let file = GitIgnoreFile::empty().chain("dir/", b"foo\n");
// I consider it undefined whether a file in a parent directory matches, but
// let's test it anyway
assert!(!file.matches("foo"));
let file = GitIgnoreFile::empty().chain("dir/", b"foo\n").unwrap();
assert!(file.matches("dir/foo"));
assert!(file.matches("dir/subdir/foo"));
}
#[test]
fn test_gitignore_pattern_same_as_prefix() {
let file = GitIgnoreFile::empty().chain("dir/", b"dir\n");
let file = GitIgnoreFile::empty().chain("dir/", b"dir\n").unwrap();
assert!(file.matches("dir/dir"));
// We don't want the "dir" pattern to apply to the parent directory
assert!(!file.matches("dir/foo"));
@ -269,24 +177,23 @@ mod tests {
#[test]
fn test_gitignore_rooted_literal() {
let file = GitIgnoreFile::empty().chain("", b"/foo\n");
let file = GitIgnoreFile::empty().chain("", b"/foo\n").unwrap();
assert!(file.matches("foo"));
assert!(!file.matches("dir/foo"));
}
#[test]
fn test_gitignore_rooted_literal_with_prefix() {
let file = GitIgnoreFile::empty().chain("dir/", b"/foo\n");
// I consider it undefined whether a file in a parent directory matches, but
// let's test it anyway
assert!(!file.matches("foo"));
let file = GitIgnoreFile::empty().chain("dir/", b"/foo\n").unwrap();
assert!(file.matches("dir/foo"));
assert!(!file.matches("dir/subdir/foo"));
}
#[test]
fn test_gitignore_deep_dir() {
let file = GitIgnoreFile::empty().chain("", b"/dir1/dir2/dir3\n");
let file = GitIgnoreFile::empty()
.chain("", b"/dir1/dir2/dir3\n")
.unwrap();
assert!(!file.matches("foo"));
assert!(!file.matches("dir1/foo"));
assert!(!file.matches("dir1/dir2/foo"));
@ -296,7 +203,7 @@ mod tests {
#[test]
fn test_gitignore_match_only_dir() {
let file = GitIgnoreFile::empty().chain("", b"/dir/\n");
let file = GitIgnoreFile::empty().chain("", b"/dir/\n").unwrap();
assert!(!file.matches("dir"));
assert!(file.matches("dir/foo"));
assert!(file.matches("dir/subdir/foo"));
@ -304,6 +211,7 @@ mod tests {
#[test]
fn test_gitignore_unusual_symbols() {
// @nocommit fix this test
assert!(matches(b"\\*\n", "*"));
assert!(!matches(b"\\*\n", "foo"));
assert!(matches(b"\\\n", "\\"));
@ -311,12 +219,14 @@ mod tests {
assert!(matches(b"\\?\n", "?"));
assert!(!matches(b"\\?\n", "x"));
// Invalid escapes are treated like literal backslashes
assert!(matches(b"\\w\n", "\\w"));
assert!(!matches(b"\\w\n", "w"));
assert!(GitIgnoreFile::empty().chain("", b"\\w\n").is_err());
// assert!(matches(b"\\w\n", "\\w"));
// assert!(!matches(b"\\w\n", "w"));
}
#[test]
fn test_gitignore_whitespace() {
// @nocommit fix this test
assert!(!matches(b" \n", " "));
assert!(matches(b"\\ \n", " "));
assert!(matches(b"\\\\ \n", "\\"));
@ -375,10 +285,9 @@ mod tests {
#[test]
fn test_gitignore_leading_dir_glob_with_prefix() {
let file = GitIgnoreFile::empty().chain("dir1/dir2/", b"**/foo\n");
// I consider it undefined whether a file in a parent directory matches, but
// let's test it anyway
assert!(!file.matches("foo"));
let file = GitIgnoreFile::empty()
.chain("dir1/dir2/", b"**/foo\n")
.unwrap();
assert!(file.matches("dir1/dir2/foo"));
assert!(!file.matches("dir1/dir2/bar"));
assert!(file.matches("dir1/dir2/sub1/sub2/foo"));
@ -422,9 +331,9 @@ mod tests {
#[test]
fn test_gitignore_file_ordering() {
let file1 = GitIgnoreFile::empty().chain("", b"foo\n");
let file2 = file1.chain("foo/", b"!bar");
let file3 = file2.chain("foo/bar/", b"baz");
let file1 = GitIgnoreFile::empty().chain("", b"foo\n").unwrap();
let file2 = file1.chain("foo/", b"!bar").unwrap();
let file3 = file2.chain("foo/bar/", b"baz").unwrap();
assert!(file1.matches("foo"));
assert!(file1.matches("foo/bar"));
assert!(!file2.matches("foo/bar"));
@ -432,4 +341,29 @@ mod tests {
assert!(file3.matches("foo/bar/baz"));
assert!(!file3.matches("foo/bar/qux"));
}
#[test]
fn test_gitignore_negative_parent_directory() {
// The following script shows that Git ignores the file:
//
// ```bash
// $ rm -rf test-repo && \
// git init test-repo &>/dev/null && \
// cd test-repo && \
// printf 'A/B.*\n!/A/\n' >.gitignore && \
// mkdir A && \
// touch A/B.ext && \
// git check-ignore A/B.ext
// A/B.ext
// ```
let ignore = GitIgnoreFile::empty()
.chain("", b"foo/bar.*\n!/foo/\n")
.unwrap();
assert!(ignore.matches("foo/bar.ext"));
let ignore = GitIgnoreFile::empty()
.chain("", b"!/foo/\nfoo/bar.*\n")
.unwrap();
assert!(ignore.matches("foo/bar.ext"));
}
}

View file

@ -678,8 +678,8 @@ impl TreeState {
if matcher.visit(&dir).is_nothing() {
return Ok(());
}
let git_ignore =
git_ignore.chain_with_file(&dir.to_internal_dir_string(), disk_dir.join(".gitignore"));
let git_ignore = git_ignore
.chain_with_file(&dir.to_internal_dir_string(), disk_dir.join(".gitignore"))?;
let dir_entries = disk_dir
.read_dir()
.unwrap()
@ -713,7 +713,7 @@ impl TreeState {
}
if file_type.is_dir() {
if git_ignore.matches(&path.to_internal_dir_string()) {
if git_ignore.matches_dir(&path.to_internal_dir_string()) {
// If the whole directory is ignored, visit only paths we're already
// tracking.
let tracked_paths = self

View file

@ -25,7 +25,7 @@ use thiserror::Error;
use crate::backend::{BackendError, MergedTreeId};
use crate::commit::Commit;
use crate::fsmonitor::FsmonitorKind;
use crate::gitignore::GitIgnoreFile;
use crate::gitignore::{GitIgnoreError, GitIgnoreFile};
use crate::merged_tree::MergedTree;
use crate::op_store::{OperationId, WorkspaceId};
use crate::repo_path::RepoPath;
@ -140,6 +140,8 @@ pub enum SnapshotError {
/// The maximum allowed size.
max_size: HumanByteSize,
},
#[error(transparent)]
GitIgnoreError(#[from] GitIgnoreError),
/// Some other error happened while snapshotting the working copy.
#[error("{message}: {err:?}")]
Other {