ok/jj
1
0
Fork 0
forked from mirrors/jj

diff: extract Diff::by_word() function

I'm going to split color-words diffs to by_line() and by_word() stages.

Perhaps, Diff::default_refinement() can be removed once all non-test callers
are migrated.
This commit is contained in:
Yuya Nishihara 2024-08-16 16:02:36 +09:00
parent f258664a2f
commit 2be8e596e2
2 changed files with 14 additions and 6 deletions

View file

@ -25,7 +25,7 @@ use jj_lib::commit::Commit;
use jj_lib::conflicts::{ use jj_lib::conflicts::{
materialized_diff_stream, MaterializedTreeDiffEntry, MaterializedTreeValue, materialized_diff_stream, MaterializedTreeDiffEntry, MaterializedTreeValue,
}; };
use jj_lib::diff::{self, Diff, DiffHunk}; use jj_lib::diff::{Diff, DiffHunk};
use jj_lib::files::{DiffLine, DiffLineHunkSide, DiffLineIterator}; use jj_lib::files::{DiffLine, DiffLineHunkSide, DiffLineIterator};
use jj_lib::matchers::Matcher; use jj_lib::matchers::Matcher;
use jj_lib::merge::MergedTreeValue; use jj_lib::merge::MergedTreeValue;
@ -1025,11 +1025,7 @@ fn inline_diff_hunks<'content>(
let mut left_tokens: DiffTokenVec<'content> = vec![]; let mut left_tokens: DiffTokenVec<'content> = vec![];
let mut right_tokens: DiffTokenVec<'content> = vec![]; let mut right_tokens: DiffTokenVec<'content> = vec![];
// Like Diff::default_refinement(), but doesn't try to match up contents by for hunk in Diff::by_word([left_content, right_content]).hunks() {
// lines. We know left/right_contents have no matching lines.
let mut diff = Diff::for_tokenizer([left_content, right_content], diff::find_word_ranges);
diff.refine_changed_regions(diff::find_nonword_ranges);
for hunk in diff.hunks() {
match hunk { match hunk {
DiffHunk::Matching(content) => { DiffHunk::Matching(content) => {
for token in content.split_inclusive(|b| *b == b'\n') { for token in content.split_inclusive(|b| *b == b'\n') {

View file

@ -493,6 +493,18 @@ impl<'input> Diff<'input> {
Diff::for_tokenizer(inputs, find_line_ranges) Diff::for_tokenizer(inputs, find_line_ranges)
} }
/// Compares `inputs` word by word.
///
/// The `inputs` is usually a changed hunk (e.g. a `DiffHunk::Different`)
/// that was the output from a line-by-line diff.
pub fn by_word<T: AsRef<[u8]> + ?Sized + 'input>(
inputs: impl IntoIterator<Item = &'input T>,
) -> Self {
let mut diff = Diff::for_tokenizer(inputs, find_word_ranges);
diff.refine_changed_regions(find_nonword_ranges);
diff
}
// TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2 // TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2
// out of 3 inputs match in the differing regions. Perhaps the refine() // out of 3 inputs match in the differing regions. Perhaps the refine()
// method should be on the hunk instead (probably returning a new Diff)? // method should be on the hunk instead (probably returning a new Diff)?