forked from mirrors/jj
diff: extract Diff::by_word() function
I'm going to split color-words diffs to by_line() and by_word() stages. Perhaps, Diff::default_refinement() can be removed once all non-test callers are migrated.
This commit is contained in:
parent
f258664a2f
commit
2be8e596e2
2 changed files with 14 additions and 6 deletions
|
@ -25,7 +25,7 @@ use jj_lib::commit::Commit;
|
|||
use jj_lib::conflicts::{
|
||||
materialized_diff_stream, MaterializedTreeDiffEntry, MaterializedTreeValue,
|
||||
};
|
||||
use jj_lib::diff::{self, Diff, DiffHunk};
|
||||
use jj_lib::diff::{Diff, DiffHunk};
|
||||
use jj_lib::files::{DiffLine, DiffLineHunkSide, DiffLineIterator};
|
||||
use jj_lib::matchers::Matcher;
|
||||
use jj_lib::merge::MergedTreeValue;
|
||||
|
@ -1025,11 +1025,7 @@ fn inline_diff_hunks<'content>(
|
|||
let mut left_tokens: DiffTokenVec<'content> = vec![];
|
||||
let mut right_tokens: DiffTokenVec<'content> = vec![];
|
||||
|
||||
// Like Diff::default_refinement(), but doesn't try to match up contents by
|
||||
// lines. We know left/right_contents have no matching lines.
|
||||
let mut diff = Diff::for_tokenizer([left_content, right_content], diff::find_word_ranges);
|
||||
diff.refine_changed_regions(diff::find_nonword_ranges);
|
||||
for hunk in diff.hunks() {
|
||||
for hunk in Diff::by_word([left_content, right_content]).hunks() {
|
||||
match hunk {
|
||||
DiffHunk::Matching(content) => {
|
||||
for token in content.split_inclusive(|b| *b == b'\n') {
|
||||
|
|
|
@ -493,6 +493,18 @@ impl<'input> Diff<'input> {
|
|||
Diff::for_tokenizer(inputs, find_line_ranges)
|
||||
}
|
||||
|
||||
/// Compares `inputs` word by word.
|
||||
///
|
||||
/// The `inputs` is usually a changed hunk (e.g. a `DiffHunk::Different`)
|
||||
/// that was the output from a line-by-line diff.
|
||||
pub fn by_word<T: AsRef<[u8]> + ?Sized + 'input>(
|
||||
inputs: impl IntoIterator<Item = &'input T>,
|
||||
) -> Self {
|
||||
let mut diff = Diff::for_tokenizer(inputs, find_word_ranges);
|
||||
diff.refine_changed_regions(find_nonword_ranges);
|
||||
diff
|
||||
}
|
||||
|
||||
// TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2
|
||||
// out of 3 inputs match in the differing regions. Perhaps the refine()
|
||||
// method should be on the hunk instead (probably returning a new Diff)?
|
||||
|
|
Loading…
Reference in a new issue