diff: compact adjacent unchanged regions also when using for_tokenizer()

I noticed while working on support for unified diffs (#33) that
`Diff::for_tokenizer(..., &find_line_ranges)` would return a
`DiffHunk::Matching` for each matching line instead of a single
`DiffHunk::Matching` for all the matching lines. That's different from
what you get from `Diff::default_refinement()` and seems less
convenient to work with.
This commit is contained in:
Martin von Zweigbergk 2021-10-09 23:41:17 -07:00
parent d92b29cca6
commit e52c902d3a

View file

@ -392,11 +392,13 @@ impl<'input> Diff<'input> {
.collect_vec();
unchanged_regions.insert(BaseRange(base_input.len()..base_input.len()), offsets);
Self {
let mut diff = Self {
base_input,
other_inputs,
unchanged_regions,
}
};
diff.compact_unchanged_regions();
diff
}
pub fn unrefined(inputs: &[&'input [u8]]) -> Self {
@ -831,6 +833,23 @@ mod tests {
);
}
#[test]
fn test_diff_for_tokenizer_compacted() {
// Tests that unchanged regions are compacted when using for_tokenizer()
let diff = Diff::for_tokenizer(
&[b"a\nb\nc\nd\ne\nf\ng", b"a\nb\nc\nX\ne\nf\ng"],
&find_line_ranges,
);
assert_eq!(
diff.hunks().collect_vec(),
vec![
DiffHunk::Matching(b"a\nb\nc\n"),
DiffHunk::Different(vec![b"d\n", b"X\n"]),
DiffHunk::Matching(b"e\nf\ng"),
]
);
}
#[test]
fn test_diff_nothing_in_common() {
assert_eq!(