ok/jj
1
0
Fork 0
forked from mirrors/jj

diff: make diff() function accept any number of inputs

So that more tests can leverage diff::diff() helper.

I also removed the fast path for identical inputs. This function is only used by
tests and benches, and production code usually compares content hashes first.
This commit is contained in:
Yuya Nishihara 2024-08-16 16:22:12 +09:00
parent aa0fbd9f3f
commit db0a4bccc5
2 changed files with 29 additions and 31 deletions

View file

@ -36,17 +36,17 @@ fn bench_diff_lines(c: &mut Criterion) {
group.bench_with_input(
BenchmarkId::new("unchanged", &label),
&unchanged_lines(count),
|b, (left, right)| b.iter(|| diff::diff(left.as_bytes(), right.as_bytes())),
|b, (left, right)| b.iter(|| diff::diff([left, right])),
);
group.bench_with_input(
BenchmarkId::new("modified", &label),
&modified_lines(count),
|b, (left, right)| b.iter(|| diff::diff(left.as_bytes(), right.as_bytes())),
|b, (left, right)| b.iter(|| diff::diff([left, right])),
);
group.bench_with_input(
BenchmarkId::new("reversed", &label),
&reversed_lines(count),
|b, (left, right)| b.iter(|| diff::diff(left.as_bytes(), right.as_bytes())),
|b, (left, right)| b.iter(|| diff::diff([left, right])),
);
}
}
@ -54,8 +54,8 @@ fn bench_diff_lines(c: &mut Criterion) {
fn bench_diff_git_git_read_tree_c(c: &mut Criterion) {
c.bench_function("bench_diff_git_git_read_tree_c", |b| {
b.iter(|| {
diff::diff(
br##"/*
diff::diff([
r##"/*
* GIT - The information manager from hell
*
* Copyright (C) Linus Torvalds, 2005
@ -104,7 +104,7 @@ int main(int argc, char **argv)
return 0;
}
"##,
br##"/*
r##"/*
* GIT - The information manager from hell
*
* Copyright (C) Linus Torvalds, 2005
@ -193,7 +193,7 @@ int main(int argc, char **argv)
return 0;
}
"##,
)
])
})
});
}

View file

@ -666,19 +666,15 @@ impl<'diff, 'input> Iterator for DiffHunkIterator<'diff, 'input> {
}
}
/// Diffs two slices of bytes. The returned diff hunks may be any length (may
/// Diffs slices of bytes. The returned diff hunks may be any length (may
/// span many lines or may be only part of a line). This currently uses
/// Histogram diff (or maybe something similar; I'm not sure I understood the
/// algorithm correctly). It first diffs lines in the input and then refines
/// the changed ranges at the word level.
pub fn diff<'a>(left: &'a [u8], right: &'a [u8]) -> Vec<DiffHunk<'a>> {
if left == right {
return vec![DiffHunk::matching(left)];
}
Diff::default_refinement([left, right])
.hunks()
.collect_vec()
pub fn diff<'a, T: AsRef<[u8]> + ?Sized + 'a>(
inputs: impl IntoIterator<Item = &'a T>,
) -> Vec<DiffHunk<'a>> {
Diff::default_refinement(inputs).hunks().collect()
}
#[cfg(test)]
@ -1091,7 +1087,7 @@ mod tests {
#[test]
fn test_diff_nothing_in_common() {
assert_eq!(
diff(b"aaa", b"bb"),
diff(["aaa", "bb"]),
vec![DiffHunk::different(["aaa", "bb"])]
);
}
@ -1099,7 +1095,7 @@ mod tests {
#[test]
fn test_diff_insert_in_middle() {
assert_eq!(
diff(b"a z", b"a S z"),
diff(["a z", "a S z"]),
vec![
DiffHunk::matching("a "),
DiffHunk::different(["", "S "]),
@ -1111,7 +1107,7 @@ mod tests {
#[test]
fn test_diff_no_unique_middle_flips() {
assert_eq!(
diff(b"a R R S S z", b"a S S R R z"),
diff(["a R R S S z", "a S S R R z"]),
vec![
DiffHunk::matching("a "),
DiffHunk::different(["R R ", ""]),
@ -1125,10 +1121,10 @@ mod tests {
#[test]
fn test_diff_recursion_needed() {
assert_eq!(
diff(
b"a q x q y q z q b q y q x q c",
b"a r r x q y z q b y q x r r c",
),
diff([
"a q x q y q z q b q y q x q c",
"a r r x q y z q b y q x r r c",
]),
vec![
DiffHunk::matching("a "),
DiffHunk::different(["q", "r"]),
@ -1154,10 +1150,12 @@ mod tests {
// and "formatter", the region at the end has the unique words "write_fmt"
// and "fmt", but we forgot to recurse into that region, so we ended up
// saying that "write_fmt(fmt).unwrap()" was replaced by b"write_fmt(fmt)".
assert_eq!(diff(
b" pub fn write_fmt(&mut self, fmt: fmt::Arguments<\'_>) {\n self.styler().write_fmt(fmt).unwrap()\n",
b" pub fn write_fmt(&mut self, fmt: fmt::Arguments<\'_>) -> io::Result<()> {\n self.styler().write_fmt(fmt)\n"
),
#[rustfmt::skip]
assert_eq!(
diff([
" pub fn write_fmt(&mut self, fmt: fmt::Arguments<\'_>) {\n self.styler().write_fmt(fmt).unwrap()\n",
" pub fn write_fmt(&mut self, fmt: fmt::Arguments<\'_>) -> io::Result<()> {\n self.styler().write_fmt(fmt)\n"
]),
vec![
DiffHunk::matching(" pub fn write_fmt(&mut self, fmt: fmt::Arguments<\'_>) "),
DiffHunk::different(["", "-> io::Result<()> "]),
@ -1173,8 +1171,8 @@ mod tests {
// This is the diff from commit e497ea2a9b in the git.git repo
#[rustfmt::skip]
assert_eq!(
diff(
br##"/*
diff([
r##"/*
* GIT - The information manager from hell
*
* Copyright (C) Linus Torvalds, 2005
@ -1223,7 +1221,7 @@ int main(int argc, char **argv)
return 0;
}
"##,
br##"/*
r##"/*
* GIT - The information manager from hell
*
* Copyright (C) Linus Torvalds, 2005
@ -1312,7 +1310,7 @@ int main(int argc, char **argv)
return 0;
}
"##,
),
]),
vec![
DiffHunk::matching("/*\n * GIT - The information manager from hell\n *\n * Copyright (C) Linus Torvalds, 2005\n */\n#include \"#cache.h\"\n\n"),
DiffHunk::different(["", "static void create_directories(const char *path)\n{\n\tint len = strlen(path);\n\tchar *buf = malloc(len + 1);\n\tconst char *slash = path;\n\n\twhile ((slash = strchr(slash+1, \'/\')) != NULL) {\n\t\tlen = slash - path;\n\t\tmemcpy(buf, path, len);\n\t\tbuf[len] = 0;\n\t\tmkdir(buf, 0700);\n\t}\n}\n\nstatic int create_file(const char *path)\n{\n\tint fd = open(path, O_WRONLY | O_TRUNC | O_CREAT, 0600);\n\tif (fd < 0) {\n\t\tif (errno == ENOENT) {\n\t\t\tcreate_directories(path);\n\t\t\tfd = open(path, O_WRONLY | O_TRUNC | O_CREAT, 0600);\n\t\t}\n\t}\n\treturn fd;\n}\n\n"]),