diff: compact adjacent unchanged regions also when using for_tokenizer()

I noticed while working on support for unified diffs (#33) that `Diff::for_tokenizer(..., &find_line_ranges)` would return a `DiffHunk::Matching` for each matching line instead of a single `DiffHunk::Matching` for all the matching lines. That's different from what you get from `Diff::default_refinement()` and seems less convenient to work with.
2024-10-23 23:10:01 +00:00 · 2021-10-09 23:41:17 -07:00 · 2021-10-09 23:41:17 -07:00 · e52c902d3a
commit e52c902d3a
parent d92b29cca6
1 changed files with 21 additions and 2 deletions
--- a/lib/src/diff.rs
+++ b/lib/src/diff.rs
@ -392,11 +392,13 @@ impl<'input> Diff<'input> {
            .collect_vec();
        unchanged_regions.insert(BaseRange(base_input.len()..base_input.len()), offsets);

-        Self {
+        let mut diff = Self {
            base_input,
            other_inputs,
            unchanged_regions,
-        }
+        };
+        diff.compact_unchanged_regions();
+        diff
    }

    pub fn unrefined(inputs: &[&'input [u8]]) -> Self {
@ -831,6 +833,23 @@ mod tests {
        );
    }

+    #[test]
+    fn test_diff_for_tokenizer_compacted() {
+        // Tests that unchanged regions are compacted when using for_tokenizer()
+        let diff = Diff::for_tokenizer(
+            &[b"a\nb\nc\nd\ne\nf\ng", b"a\nb\nc\nX\ne\nf\ng"],
+            &find_line_ranges,
+        );
+        assert_eq!(
+            diff.hunks().collect_vec(),
+            vec![
+                DiffHunk::Matching(b"a\nb\nc\n"),
+                DiffHunk::Different(vec![b"d\n", b"X\n"]),
+                DiffHunk::Matching(b"e\nf\ng"),
+            ]
+        );
+    }
+
    #[test]
    fn test_diff_nothing_in_common() {
        assert_eq!(