diff: extract Diff::by_word() function

I'm going to split color-words diffs to by_line() and by_word() stages. Perhaps, Diff::default_refinement() can be removed once all non-test callers are migrated.
2024-08-16 16:02:36 +09:00 · 2024-08-16 16:02:36 +09:00 · 2be8e596e2
commit 2be8e596e2
parent f258664a2f
2 changed files with 14 additions and 6 deletions
--- a/cli/src/diff_util.rs
+++ b/cli/src/diff_util.rs
@ -25,7 +25,7 @@ use jj_lib::commit::Commit;
 use jj_lib::conflicts::{
    materialized_diff_stream, MaterializedTreeDiffEntry, MaterializedTreeValue,
 };
-use jj_lib::diff::{self, Diff, DiffHunk};
+use jj_lib::diff::{Diff, DiffHunk};
 use jj_lib::files::{DiffLine, DiffLineHunkSide, DiffLineIterator};
 use jj_lib::matchers::Matcher;
 use jj_lib::merge::MergedTreeValue;
@ -1025,11 +1025,7 @@ fn inline_diff_hunks<'content>(
    let mut left_tokens: DiffTokenVec<'content> = vec![];
    let mut right_tokens: DiffTokenVec<'content> = vec![];

-    // Like Diff::default_refinement(), but doesn't try to match up contents by
-    // lines. We know left/right_contents have no matching lines.
-    let mut diff = Diff::for_tokenizer([left_content, right_content], diff::find_word_ranges);
-    diff.refine_changed_regions(diff::find_nonword_ranges);
-    for hunk in diff.hunks() {
+    for hunk in Diff::by_word([left_content, right_content]).hunks() {
        match hunk {
            DiffHunk::Matching(content) => {
                for token in content.split_inclusive(|b| *b == b'\n') {
--- a/lib/src/diff.rs
+++ b/lib/src/diff.rs
@ -493,6 +493,18 @@ impl<'input> Diff<'input> {
        Diff::for_tokenizer(inputs, find_line_ranges)
    }

+    /// Compares `inputs` word by word.
+    ///
+    /// The `inputs` is usually a changed hunk (e.g. a `DiffHunk::Different`)
+    /// that was the output from a line-by-line diff.
+    pub fn by_word<T: AsRef<[u8]> + ?Sized + 'input>(
+        inputs: impl IntoIterator<Item = &'input T>,
+    ) -> Self {
+        let mut diff = Diff::for_tokenizer(inputs, find_word_ranges);
+        diff.refine_changed_regions(find_nonword_ranges);
+        diff
+    }
+
    // TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2
    // out of 3 inputs match in the differing regions. Perhaps the refine()
    // method should be on the hunk instead (probably returning a new Diff)?