From e52c902d3ad77e15bc32bc26503274f3b5ff9853 Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Sat, 9 Oct 2021 23:41:17 -0700 Subject: [PATCH] diff: compact adjacent unchanged regions also when using `for_tokenizer()` I noticed while working on support for unified diffs (#33) that `Diff::for_tokenizer(..., &find_line_ranges)` would return a `DiffHunk::Matching` for each matching line instead of a single `DiffHunk::Matching` for all the matching lines. That's different from what you get from `Diff::default_refinement()` and seems less convenient to work with. --- lib/src/diff.rs | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/lib/src/diff.rs b/lib/src/diff.rs index 72abc13be..c4d59c4c3 100644 --- a/lib/src/diff.rs +++ b/lib/src/diff.rs @@ -392,11 +392,13 @@ impl<'input> Diff<'input> { .collect_vec(); unchanged_regions.insert(BaseRange(base_input.len()..base_input.len()), offsets); - Self { + let mut diff = Self { base_input, other_inputs, unchanged_regions, - } + }; + diff.compact_unchanged_regions(); + diff } pub fn unrefined(inputs: &[&'input [u8]]) -> Self { @@ -831,6 +833,23 @@ mod tests { ); } + #[test] + fn test_diff_for_tokenizer_compacted() { + // Tests that unchanged regions are compacted when using for_tokenizer() + let diff = Diff::for_tokenizer( + &[b"a\nb\nc\nd\ne\nf\ng", b"a\nb\nc\nX\ne\nf\ng"], + &find_line_ranges, + ); + assert_eq!( + diff.hunks().collect_vec(), + vec![ + DiffHunk::Matching(b"a\nb\nc\n"), + DiffHunk::Different(vec![b"d\n", b"X\n"]), + DiffHunk::Matching(b"e\nf\ng"), + ] + ); + } + #[test] fn test_diff_nothing_in_common() { assert_eq!(