mirror of
https://github.com/martinvonz/jj.git
synced 2025-01-15 08:53:16 +00:00
diff: accept diff inputs by generic iterator
This helps migrate internal [u8] variables to BStr. b"" literals in tests are changed to &str to get around potential type incompatibility between &[u8; N].
This commit is contained in:
parent
2ca3bad0ee
commit
59daef2351
5 changed files with 30 additions and 26 deletions
|
@ -852,7 +852,7 @@ fn unified_diff_hunks<'content>(
|
|||
right_line_range: 1..1,
|
||||
lines: vec![],
|
||||
};
|
||||
let diff = Diff::by_line(&[left_content, right_content]);
|
||||
let diff = Diff::by_line([left_content, right_content]);
|
||||
let mut diff_hunks = diff.hunks().peekable();
|
||||
while let Some(hunk) = diff_hunks.next() {
|
||||
match hunk {
|
||||
|
@ -910,7 +910,7 @@ fn inline_diff_hunks<'content>(
|
|||
|
||||
// Like Diff::default_refinement(), but doesn't try to match up contents by
|
||||
// lines. We know left/right_contents have no matching lines.
|
||||
let mut diff = Diff::for_tokenizer(&[left_content, right_content], diff::find_word_ranges);
|
||||
let mut diff = Diff::for_tokenizer([left_content, right_content], diff::find_word_ranges);
|
||||
diff.refine_changed_regions(diff::find_nonword_ranges);
|
||||
for hunk in diff.hunks() {
|
||||
match hunk {
|
||||
|
@ -1128,7 +1128,7 @@ fn get_diff_stat(
|
|||
// TODO: this matches git's behavior, which is to count the number of newlines
|
||||
// in the file. but that behavior seems unhelpful; no one really cares how
|
||||
// many `0x0a` characters are in an image.
|
||||
let diff = Diff::by_line(&[&left_content.contents, &right_content.contents]);
|
||||
let diff = Diff::by_line([&left_content.contents, &right_content.contents]);
|
||||
let mut added = 0;
|
||||
let mut removed = 0;
|
||||
for hunk in diff.hunks() {
|
||||
|
|
|
@ -225,7 +225,7 @@ fn make_diff_sections(
|
|||
left_contents: &str,
|
||||
right_contents: &str,
|
||||
) -> Result<Vec<scm_record::Section<'static>>, BuiltinToolError> {
|
||||
let diff = Diff::by_line(&[left_contents.as_bytes(), right_contents.as_bytes()]);
|
||||
let diff = Diff::by_line([left_contents.as_bytes(), right_contents.as_bytes()]);
|
||||
let mut sections = Vec::new();
|
||||
for hunk in diff.hunks() {
|
||||
match hunk {
|
||||
|
|
|
@ -259,12 +259,12 @@ pub fn materialize_merge_result(
|
|||
output.write_all(&left.0)?;
|
||||
continue;
|
||||
};
|
||||
let diff1 = Diff::by_line(&[&left.0, &right1.0]).hunks().collect_vec();
|
||||
let diff1 = Diff::by_line([&left.0, &right1.0]).hunks().collect_vec();
|
||||
// Check if the diff against the next positive term is better. Since
|
||||
// we want to preserve the order of the terms, we don't match against
|
||||
// any later positive terms.
|
||||
if let Some(right2) = hunk.get_add(add_index + 1) {
|
||||
let diff2 = Diff::by_line(&[&left.0, &right2.0]).hunks().collect_vec();
|
||||
let diff2 = Diff::by_line([&left.0, &right2.0]).hunks().collect_vec();
|
||||
if diff_size(&diff2) < diff_size(&diff1) {
|
||||
// If the next positive term is a better match, emit
|
||||
// the current positive term as a snapshot and the next
|
||||
|
|
|
@ -408,13 +408,13 @@ fn intersect_regions(
|
|||
}
|
||||
|
||||
impl<'input> Diff<'input> {
|
||||
pub fn for_tokenizer(
|
||||
inputs: &[&'input [u8]],
|
||||
pub fn for_tokenizer<T: AsRef<[u8]> + ?Sized + 'input>(
|
||||
inputs: impl IntoIterator<Item = &'input T>,
|
||||
tokenizer: impl Fn(&[u8]) -> Vec<Range<usize>>,
|
||||
) -> Self {
|
||||
assert!(!inputs.is_empty());
|
||||
let base_input = inputs[0];
|
||||
let other_inputs = inputs.iter().skip(1).copied().collect_vec();
|
||||
let mut inputs = inputs.into_iter().map(AsRef::as_ref);
|
||||
let base_input = inputs.next().expect("inputs must not be empty");
|
||||
let other_inputs = inputs.collect_vec();
|
||||
// First tokenize each input
|
||||
let base_token_ranges = tokenizer(base_input);
|
||||
let other_token_ranges = other_inputs
|
||||
|
@ -471,12 +471,16 @@ impl<'input> Diff<'input> {
|
|||
diff
|
||||
}
|
||||
|
||||
pub fn unrefined(inputs: &[&'input [u8]]) -> Self {
|
||||
pub fn unrefined<T: AsRef<[u8]> + ?Sized + 'input>(
|
||||
inputs: impl IntoIterator<Item = &'input T>,
|
||||
) -> Self {
|
||||
Diff::for_tokenizer(inputs, |_| vec![])
|
||||
}
|
||||
|
||||
/// Compares `inputs` line by line.
|
||||
pub fn by_line(inputs: &[&'input [u8]]) -> Self {
|
||||
pub fn by_line<T: AsRef<[u8]> + ?Sized + 'input>(
|
||||
inputs: impl IntoIterator<Item = &'input T>,
|
||||
) -> Self {
|
||||
Diff::for_tokenizer(inputs, find_line_ranges)
|
||||
}
|
||||
|
||||
|
@ -486,7 +490,9 @@ impl<'input> Diff<'input> {
|
|||
// That would let each user decide which hunks to refine. However, it would
|
||||
// probably mean that many callers repeat the same code. Perhaps it
|
||||
// should be possible to refine a whole diff *or* individual hunks.
|
||||
pub fn default_refinement(inputs: &[&'input [u8]]) -> Self {
|
||||
pub fn default_refinement<T: AsRef<[u8]> + ?Sized + 'input>(
|
||||
inputs: impl IntoIterator<Item = &'input T>,
|
||||
) -> Self {
|
||||
let mut diff = Diff::for_tokenizer(inputs, find_line_ranges);
|
||||
diff.refine_changed_regions(find_word_ranges);
|
||||
diff.refine_changed_regions(find_nonword_ranges);
|
||||
|
@ -526,7 +532,7 @@ impl<'input> Diff<'input> {
|
|||
slices.push(&self.other_inputs[i][changed_range]);
|
||||
}
|
||||
|
||||
let refined_diff = Diff::for_tokenizer(&slices, &tokenizer);
|
||||
let refined_diff = Diff::for_tokenizer(slices, &tokenizer);
|
||||
|
||||
for UnchangedRange {
|
||||
base_range,
|
||||
|
@ -663,7 +669,7 @@ pub fn diff<'a>(left: &'a [u8], right: &'a [u8]) -> Vec<DiffHunk<'a>> {
|
|||
return vec![DiffHunk::Different(vec![left, b""])];
|
||||
}
|
||||
|
||||
Diff::default_refinement(&[left, right])
|
||||
Diff::default_refinement([left, right])
|
||||
.hunks()
|
||||
.collect_vec()
|
||||
}
|
||||
|
@ -967,19 +973,19 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_diff_single_input() {
|
||||
let diff = Diff::default_refinement(&[b"abc"]);
|
||||
let diff = Diff::default_refinement(["abc"]);
|
||||
assert_eq!(diff.hunks().collect_vec(), vec![DiffHunk::Matching(b"abc")]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_diff_single_empty_input() {
|
||||
let diff = Diff::default_refinement(&[b""]);
|
||||
let diff = Diff::default_refinement([""]);
|
||||
assert_eq!(diff.hunks().collect_vec(), vec![]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_diff_two_inputs_one_different() {
|
||||
let diff = Diff::default_refinement(&[b"a b c", b"a X c"]);
|
||||
let diff = Diff::default_refinement(["a b c", "a X c"]);
|
||||
assert_eq!(
|
||||
diff.hunks().collect_vec(),
|
||||
vec![
|
||||
|
@ -992,7 +998,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_diff_multiple_inputs_one_different() {
|
||||
let diff = Diff::default_refinement(&[b"a b c", b"a X c", b"a b c"]);
|
||||
let diff = Diff::default_refinement(["a b c", "a X c", "a b c"]);
|
||||
assert_eq!(
|
||||
diff.hunks().collect_vec(),
|
||||
vec![
|
||||
|
@ -1005,7 +1011,7 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_diff_multiple_inputs_all_different() {
|
||||
let diff = Diff::default_refinement(&[b"a b c", b"a X c", b"a c X"]);
|
||||
let diff = Diff::default_refinement(["a b c", "a X c", "a c X"]);
|
||||
assert_eq!(
|
||||
diff.hunks().collect_vec(),
|
||||
vec![
|
||||
|
@ -1021,7 +1027,7 @@ mod tests {
|
|||
fn test_diff_for_tokenizer_compacted() {
|
||||
// Tests that unchanged regions are compacted when using for_tokenizer()
|
||||
let diff = Diff::for_tokenizer(
|
||||
&[b"a\nb\nc\nd\ne\nf\ng", b"a\nb\nc\nX\ne\nf\ng"],
|
||||
["a\nb\nc\nd\ne\nf\ng", "a\nb\nc\nX\ne\nf\ng"],
|
||||
find_line_ranges,
|
||||
);
|
||||
assert_eq!(
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
use std::collections::VecDeque;
|
||||
use std::fmt::{Debug, Error, Formatter};
|
||||
|
||||
use itertools::Itertools;
|
||||
|
||||
use crate::diff;
|
||||
use crate::diff::{Diff, DiffHunk};
|
||||
use crate::merge::{trivial_merge, Merge};
|
||||
|
@ -163,9 +161,9 @@ pub fn merge(slices: &Merge<&[u8]>) -> MergeResult {
|
|||
// usually done for 3-way conflicts. Are there better heuristics when there are
|
||||
// more than 3 parts?
|
||||
let num_diffs = slices.removes().len();
|
||||
let diff_inputs = slices.removes().chain(slices.adds()).copied().collect_vec();
|
||||
let diff_inputs = slices.removes().chain(slices.adds());
|
||||
|
||||
let diff = Diff::by_line(&diff_inputs);
|
||||
let diff = Diff::by_line(diff_inputs);
|
||||
let mut resolved_hunk = ContentHunk(vec![]);
|
||||
let mut merge_hunks: Vec<Merge<ContentHunk>> = vec![];
|
||||
for diff_hunk in diff.hunks() {
|
||||
|
|
Loading…
Reference in a new issue