forked from mirrors/jj
diff: extract Diff::by_word() function
I'm going to split color-words diffs to by_line() and by_word() stages. Perhaps, Diff::default_refinement() can be removed once all non-test callers are migrated.
This commit is contained in:
parent
f258664a2f
commit
2be8e596e2
2 changed files with 14 additions and 6 deletions
|
@ -25,7 +25,7 @@ use jj_lib::commit::Commit;
|
||||||
use jj_lib::conflicts::{
|
use jj_lib::conflicts::{
|
||||||
materialized_diff_stream, MaterializedTreeDiffEntry, MaterializedTreeValue,
|
materialized_diff_stream, MaterializedTreeDiffEntry, MaterializedTreeValue,
|
||||||
};
|
};
|
||||||
use jj_lib::diff::{self, Diff, DiffHunk};
|
use jj_lib::diff::{Diff, DiffHunk};
|
||||||
use jj_lib::files::{DiffLine, DiffLineHunkSide, DiffLineIterator};
|
use jj_lib::files::{DiffLine, DiffLineHunkSide, DiffLineIterator};
|
||||||
use jj_lib::matchers::Matcher;
|
use jj_lib::matchers::Matcher;
|
||||||
use jj_lib::merge::MergedTreeValue;
|
use jj_lib::merge::MergedTreeValue;
|
||||||
|
@ -1025,11 +1025,7 @@ fn inline_diff_hunks<'content>(
|
||||||
let mut left_tokens: DiffTokenVec<'content> = vec![];
|
let mut left_tokens: DiffTokenVec<'content> = vec![];
|
||||||
let mut right_tokens: DiffTokenVec<'content> = vec![];
|
let mut right_tokens: DiffTokenVec<'content> = vec![];
|
||||||
|
|
||||||
// Like Diff::default_refinement(), but doesn't try to match up contents by
|
for hunk in Diff::by_word([left_content, right_content]).hunks() {
|
||||||
// lines. We know left/right_contents have no matching lines.
|
|
||||||
let mut diff = Diff::for_tokenizer([left_content, right_content], diff::find_word_ranges);
|
|
||||||
diff.refine_changed_regions(diff::find_nonword_ranges);
|
|
||||||
for hunk in diff.hunks() {
|
|
||||||
match hunk {
|
match hunk {
|
||||||
DiffHunk::Matching(content) => {
|
DiffHunk::Matching(content) => {
|
||||||
for token in content.split_inclusive(|b| *b == b'\n') {
|
for token in content.split_inclusive(|b| *b == b'\n') {
|
||||||
|
|
|
@ -493,6 +493,18 @@ impl<'input> Diff<'input> {
|
||||||
Diff::for_tokenizer(inputs, find_line_ranges)
|
Diff::for_tokenizer(inputs, find_line_ranges)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Compares `inputs` word by word.
|
||||||
|
///
|
||||||
|
/// The `inputs` is usually a changed hunk (e.g. a `DiffHunk::Different`)
|
||||||
|
/// that was the output from a line-by-line diff.
|
||||||
|
pub fn by_word<T: AsRef<[u8]> + ?Sized + 'input>(
|
||||||
|
inputs: impl IntoIterator<Item = &'input T>,
|
||||||
|
) -> Self {
|
||||||
|
let mut diff = Diff::for_tokenizer(inputs, find_word_ranges);
|
||||||
|
diff.refine_changed_regions(find_nonword_ranges);
|
||||||
|
diff
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2
|
// TODO: At least when merging, it's wasteful to refine the diff if e.g. if 2
|
||||||
// out of 3 inputs match in the differing regions. Perhaps the refine()
|
// out of 3 inputs match in the differing regions. Perhaps the refine()
|
||||||
// method should be on the hunk instead (probably returning a new Diff)?
|
// method should be on the hunk instead (probably returning a new Diff)?
|
||||||
|
|
Loading…
Reference in a new issue