From 2be8e596e26ff974d50832f15a5df42d1348d3d5 Mon Sep 17 00:00:00 2001 From: Yuya Nishihara Date: Fri, 16 Aug 2024 16:02:36 +0900 Subject: [PATCH] diff: extract Diff::by_word() function I'm going to split color-words diffs to by_line() and by_word() stages. Perhaps, Diff::default_refinement() can be removed once all non-test callers are migrated. --- cli/src/diff_util.rs | 8 ++------ lib/src/diff.rs | 12 ++++++++++++ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cli/src/diff_util.rs b/cli/src/diff_util.rs index 63a79210b..9b37f029d 100644 --- a/cli/src/diff_util.rs +++ b/cli/src/diff_util.rs @@ -25,7 +25,7 @@ use jj_lib::commit::Commit; use jj_lib::conflicts::{ materialized_diff_stream, MaterializedTreeDiffEntry, MaterializedTreeValue, }; -use jj_lib::diff::{self, Diff, DiffHunk}; +use jj_lib::diff::{Diff, DiffHunk}; use jj_lib::files::{DiffLine, DiffLineHunkSide, DiffLineIterator}; use jj_lib::matchers::Matcher; use jj_lib::merge::MergedTreeValue; @@ -1025,11 +1025,7 @@ fn inline_diff_hunks<'content>( let mut left_tokens: DiffTokenVec<'content> = vec![]; let mut right_tokens: DiffTokenVec<'content> = vec![]; - // Like Diff::default_refinement(), but doesn't try to match up contents by - // lines. We know left/right_contents have no matching lines. - let mut diff = Diff::for_tokenizer([left_content, right_content], diff::find_word_ranges); - diff.refine_changed_regions(diff::find_nonword_ranges); - for hunk in diff.hunks() { + for hunk in Diff::by_word([left_content, right_content]).hunks() { match hunk { DiffHunk::Matching(content) => { for token in content.split_inclusive(|b| *b == b'\n') { diff --git a/lib/src/diff.rs b/lib/src/diff.rs index 5daa918e4..a56b3af78 100644 --- a/lib/src/diff.rs +++ b/lib/src/diff.rs @@ -493,6 +493,18 @@ impl<'input> Diff<'input> { Diff::for_tokenizer(inputs, find_line_ranges) } + /// Compares `inputs` word by word. + /// + /// The `inputs` is usually a changed hunk (e.g. a `DiffHunk::Different`) + /// that was the output from a line-by-line diff. + pub fn by_word + ?Sized + 'input>( + inputs: impl IntoIterator, + ) -> Self { + let mut diff = Diff::for_tokenizer(inputs, find_word_ranges); + diff.refine_changed_regions(find_nonword_ranges); + diff + } + // TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2 // out of 3 inputs match in the differing regions. Perhaps the refine() // method should be on the hunk instead (probably returning a new Diff)?