2020-12-12 08:00:42 +00:00
|
|
|
// Copyright 2020 Google LLC
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// https://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2021-04-07 06:33:38 +00:00
|
|
|
use std::collections::VecDeque;
|
2020-12-12 08:00:42 +00:00
|
|
|
use std::fmt::{Debug, Error, Formatter};
|
2021-01-19 06:00:40 +00:00
|
|
|
use std::ops::Range;
|
2020-12-12 08:00:42 +00:00
|
|
|
|
2021-03-26 16:52:05 +00:00
|
|
|
use crate::diff;
|
2021-04-07 06:33:38 +00:00
|
|
|
use crate::diff::SliceDiff;
|
2020-12-12 08:00:42 +00:00
|
|
|
|
|
|
|
#[derive(PartialEq, Eq, Clone, Debug)]
|
2021-01-18 08:53:44 +00:00
|
|
|
pub enum DiffHunk<'a> {
|
|
|
|
Unmodified(&'a [u8]),
|
|
|
|
Added(&'a [u8]),
|
|
|
|
Removed(&'a [u8]),
|
2020-12-12 08:00:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(PartialEq, Eq, Clone, Debug)]
|
2021-01-18 08:53:44 +00:00
|
|
|
pub struct DiffLine<'a> {
|
2020-12-12 08:00:42 +00:00
|
|
|
pub left_line_number: u32,
|
|
|
|
pub right_line_number: u32,
|
|
|
|
pub has_left_content: bool,
|
|
|
|
pub has_right_content: bool,
|
2021-01-18 08:53:44 +00:00
|
|
|
pub hunks: Vec<DiffHunk<'a>>,
|
2020-12-12 08:00:42 +00:00
|
|
|
}
|
|
|
|
|
2021-01-18 08:53:44 +00:00
|
|
|
impl DiffLine<'_> {
|
2020-12-12 08:00:42 +00:00
|
|
|
fn reset_line(&mut self) {
|
|
|
|
self.has_left_content = false;
|
|
|
|
self.has_right_content = false;
|
|
|
|
self.hunks.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn is_unmodified(&self) -> bool {
|
|
|
|
self.hunks
|
|
|
|
.iter()
|
|
|
|
.all(|hunk| matches!(hunk, DiffHunk::Unmodified(_)))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-07 06:33:38 +00:00
|
|
|
pub fn diff<'a>(left: &'a [u8], right: &'a [u8]) -> DiffLineIterator<'a> {
|
|
|
|
let slice_diffs = diff::diff(left, right);
|
|
|
|
DiffLineIterator::new(slice_diffs)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct DiffLineIterator<'a> {
|
|
|
|
slice_diffs: Vec<SliceDiff<'a>>,
|
|
|
|
current_pos: usize,
|
|
|
|
current_line: DiffLine<'a>,
|
|
|
|
queued_lines: VecDeque<DiffLine<'a>>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> DiffLineIterator<'a> {
|
|
|
|
fn new(slice_diffs: Vec<SliceDiff<'a>>) -> Self {
|
|
|
|
let current_line = DiffLine {
|
|
|
|
left_line_number: 1,
|
|
|
|
right_line_number: 1,
|
|
|
|
has_left_content: false,
|
|
|
|
has_right_content: false,
|
|
|
|
hunks: vec![],
|
|
|
|
};
|
|
|
|
DiffLineIterator {
|
|
|
|
slice_diffs,
|
|
|
|
current_pos: 0,
|
|
|
|
current_line,
|
|
|
|
queued_lines: VecDeque::new(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> Iterator for DiffLineIterator<'a> {
|
|
|
|
type Item = DiffLine<'a>;
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
// TODO: Should we attempt to interpret as utf-8 and otherwise break only at
|
|
|
|
// newlines?
|
|
|
|
while self.current_pos < self.slice_diffs.len() && self.queued_lines.is_empty() {
|
|
|
|
let hunk = &self.slice_diffs[self.current_pos];
|
|
|
|
self.current_pos += 1;
|
|
|
|
match hunk {
|
|
|
|
diff::SliceDiff::Unchanged(text) => {
|
|
|
|
let lines = text.split_inclusive(|b| *b == b'\n');
|
|
|
|
for line in lines {
|
|
|
|
self.current_line.has_left_content = true;
|
|
|
|
self.current_line.has_right_content = true;
|
|
|
|
self.current_line.hunks.push(DiffHunk::Unmodified(line));
|
|
|
|
if line.ends_with(b"\n") {
|
|
|
|
self.queued_lines.push_back(self.current_line.clone());
|
|
|
|
self.current_line.left_line_number += 1;
|
|
|
|
self.current_line.right_line_number += 1;
|
|
|
|
self.current_line.reset_line();
|
|
|
|
}
|
2021-03-26 16:52:05 +00:00
|
|
|
}
|
2020-12-12 08:00:42 +00:00
|
|
|
}
|
2021-04-07 06:33:38 +00:00
|
|
|
diff::SliceDiff::Replaced(left, right) => {
|
|
|
|
let left_lines = left.split_inclusive(|b| *b == b'\n');
|
|
|
|
for left_line in left_lines {
|
|
|
|
self.current_line.has_left_content = true;
|
|
|
|
self.current_line.hunks.push(DiffHunk::Removed(left_line));
|
|
|
|
if left_line.ends_with(b"\n") {
|
|
|
|
self.queued_lines.push_back(self.current_line.clone());
|
|
|
|
self.current_line.left_line_number += 1;
|
|
|
|
self.current_line.reset_line();
|
|
|
|
}
|
2021-03-26 16:52:05 +00:00
|
|
|
}
|
2021-04-07 06:33:38 +00:00
|
|
|
let right_lines = right.split_inclusive(|b| *b == b'\n');
|
|
|
|
for right_line in right_lines {
|
|
|
|
self.current_line.has_right_content = true;
|
|
|
|
self.current_line.hunks.push(DiffHunk::Added(right_line));
|
|
|
|
if right_line.ends_with(b"\n") {
|
|
|
|
self.queued_lines.push_back(self.current_line.clone());
|
|
|
|
self.current_line.right_line_number += 1;
|
|
|
|
self.current_line.reset_line();
|
|
|
|
}
|
2021-03-26 16:52:05 +00:00
|
|
|
}
|
2020-12-12 08:00:42 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-04-07 06:33:38 +00:00
|
|
|
|
|
|
|
if let Some(line) = self.queued_lines.pop_front() {
|
|
|
|
return Some(line);
|
|
|
|
}
|
|
|
|
|
|
|
|
if !self.current_line.hunks.is_empty() {
|
|
|
|
let line = self.current_line.clone();
|
|
|
|
self.current_line.reset_line();
|
|
|
|
return Some(line);
|
|
|
|
}
|
|
|
|
|
|
|
|
None
|
2020-12-12 08:00:42 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(PartialEq, Eq, Clone)]
|
|
|
|
pub enum MergeHunk {
|
|
|
|
Resolved(Vec<u8>),
|
|
|
|
Conflict {
|
|
|
|
base: Vec<u8>,
|
|
|
|
left: Vec<u8>,
|
|
|
|
right: Vec<u8>,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Debug for MergeHunk {
|
|
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
|
|
|
|
match self {
|
|
|
|
MergeHunk::Resolved(data) => f
|
|
|
|
.debug_tuple("Resolved")
|
|
|
|
.field(&String::from_utf8_lossy(data))
|
|
|
|
.finish(),
|
|
|
|
MergeHunk::Conflict { base, left, right } => f
|
|
|
|
.debug_struct("Conflict")
|
|
|
|
.field("base", &String::from_utf8_lossy(base))
|
|
|
|
.field("left", &String::from_utf8_lossy(left))
|
|
|
|
.field("right", &String::from_utf8_lossy(right))
|
|
|
|
.finish(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
|
|
pub enum MergeResult {
|
|
|
|
Resolved(Vec<u8>),
|
|
|
|
Conflict(Vec<MergeHunk>),
|
|
|
|
}
|
|
|
|
|
2021-01-19 06:00:40 +00:00
|
|
|
/// A region where the base and two sides match.
|
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
|
|
struct SyncRegion {
|
|
|
|
base: Range<usize>,
|
|
|
|
left: Range<usize>,
|
|
|
|
right: Range<usize>,
|
|
|
|
}
|
|
|
|
|
|
|
|
fn find_sync_regions(base: &[u8], left: &[u8], right: &[u8]) -> Vec<SyncRegion> {
|
2021-03-26 15:44:18 +00:00
|
|
|
let base_tokens = crate::diff::find_line_ranges(base);
|
|
|
|
let left_tokens = crate::diff::find_line_ranges(left);
|
|
|
|
let right_tokens = crate::diff::find_line_ranges(right);
|
2020-12-12 08:00:42 +00:00
|
|
|
|
2021-03-26 15:44:18 +00:00
|
|
|
let left_regions = crate::diff::unchanged_ranges(base, left, &base_tokens, &left_tokens);
|
|
|
|
let right_regions = crate::diff::unchanged_ranges(base, right, &base_tokens, &right_tokens);
|
2021-01-19 06:00:40 +00:00
|
|
|
|
|
|
|
let mut left_it = left_regions.iter().peekable();
|
|
|
|
let mut right_it = right_regions.iter().peekable();
|
2020-12-12 08:00:42 +00:00
|
|
|
|
2021-01-19 06:00:40 +00:00
|
|
|
let mut regions: Vec<SyncRegion> = vec![];
|
2021-03-26 15:44:18 +00:00
|
|
|
while let (Some((left_base_range, left_range)), Some((right_base_range, right_range))) =
|
2021-01-19 06:00:40 +00:00
|
|
|
(left_it.peek(), right_it.peek())
|
|
|
|
{
|
2021-03-26 15:44:18 +00:00
|
|
|
// TODO: if left_base_range and right_base_range at least intersect, use the
|
2021-01-19 06:00:40 +00:00
|
|
|
// intersection of the two regions.
|
2021-03-26 15:44:18 +00:00
|
|
|
if left_base_range == right_base_range {
|
2021-01-19 06:00:40 +00:00
|
|
|
regions.push(SyncRegion {
|
2021-03-26 15:44:18 +00:00
|
|
|
base: left_base_range.clone(),
|
|
|
|
left: left_range.clone(),
|
|
|
|
right: right_range.clone(),
|
2021-01-19 06:00:40 +00:00
|
|
|
});
|
|
|
|
left_it.next().unwrap();
|
|
|
|
right_it.next().unwrap();
|
2021-03-26 15:44:18 +00:00
|
|
|
} else if left_base_range.start < right_base_range.start {
|
2021-01-19 06:00:40 +00:00
|
|
|
left_it.next().unwrap();
|
|
|
|
} else {
|
|
|
|
right_it.next().unwrap();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
regions.push(SyncRegion {
|
|
|
|
base: (base.len()..base.len()),
|
|
|
|
left: (left.len()..left.len()),
|
|
|
|
right: (right.len()..right.len()),
|
|
|
|
});
|
|
|
|
regions
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn merge(base: &[u8], left: &[u8], right: &[u8]) -> MergeResult {
|
|
|
|
let mut previous_region = SyncRegion {
|
|
|
|
base: 0..0,
|
|
|
|
left: 0..0,
|
|
|
|
right: 0..0,
|
|
|
|
};
|
2020-12-12 08:00:42 +00:00
|
|
|
let mut hunk: Vec<u8> = vec![];
|
|
|
|
let mut hunks: Vec<MergeHunk> = vec![];
|
2021-01-19 06:00:40 +00:00
|
|
|
// Find regions that match between base, left, and right. Emit the unchanged
|
|
|
|
// regions as is. For the potentially conflicting regions between them, use
|
|
|
|
// one side if the other is changed. If all three sides are different, emit
|
|
|
|
// a conflict.
|
|
|
|
for sync_region in find_sync_regions(base, left, right) {
|
|
|
|
let base_conflict_slice = &base[previous_region.base.end..sync_region.base.start];
|
|
|
|
let left_conflict_slice = &left[previous_region.left.end..sync_region.left.start];
|
|
|
|
let right_conflict_slice = &right[previous_region.right.end..sync_region.right.start];
|
|
|
|
if left_conflict_slice == base_conflict_slice || left_conflict_slice == right_conflict_slice
|
|
|
|
{
|
|
|
|
hunk.extend(right_conflict_slice);
|
|
|
|
} else if right_conflict_slice == base_conflict_slice {
|
|
|
|
hunk.extend(left_conflict_slice);
|
|
|
|
} else {
|
|
|
|
if !hunk.is_empty() {
|
|
|
|
hunks.push(MergeHunk::Resolved(hunk));
|
|
|
|
hunk = vec![];
|
2020-12-12 08:00:42 +00:00
|
|
|
}
|
2021-01-19 06:00:40 +00:00
|
|
|
hunks.push(MergeHunk::Conflict {
|
|
|
|
base: base_conflict_slice.to_vec(),
|
|
|
|
left: left_conflict_slice.to_vec(),
|
|
|
|
right: right_conflict_slice.to_vec(),
|
|
|
|
});
|
2020-12-12 08:00:42 +00:00
|
|
|
}
|
2021-01-19 06:00:40 +00:00
|
|
|
hunk.extend(base[sync_region.base.clone()].to_vec());
|
|
|
|
previous_region = sync_region;
|
2020-12-12 08:00:42 +00:00
|
|
|
}
|
2021-01-19 06:00:40 +00:00
|
|
|
|
2020-12-12 08:00:42 +00:00
|
|
|
if hunks.is_empty() {
|
|
|
|
MergeResult::Resolved(hunk)
|
|
|
|
} else {
|
|
|
|
if !hunk.is_empty() {
|
|
|
|
hunks.push(MergeHunk::Resolved(hunk));
|
|
|
|
}
|
|
|
|
MergeResult::Conflict(hunks)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
2021-01-19 06:00:40 +00:00
|
|
|
#[test]
|
|
|
|
fn test_find_sync_regions() {
|
|
|
|
assert_eq!(
|
|
|
|
find_sync_regions(b"", b"", b""),
|
|
|
|
vec![SyncRegion {
|
|
|
|
base: 0..0,
|
|
|
|
left: 0..0,
|
|
|
|
right: 0..0,
|
|
|
|
}]
|
|
|
|
);
|
|
|
|
|
|
|
|
assert_eq!(
|
2021-04-01 15:15:38 +00:00
|
|
|
find_sync_regions(b"a\nb\nc\n", b"a\nx\nb\nc\n", b"a\nb\ny\nc\n"),
|
2021-01-19 06:00:40 +00:00
|
|
|
vec![
|
|
|
|
SyncRegion {
|
2021-04-01 15:15:38 +00:00
|
|
|
base: 0..2,
|
|
|
|
left: 0..2,
|
|
|
|
right: 0..2
|
2021-01-19 06:00:40 +00:00
|
|
|
},
|
|
|
|
SyncRegion {
|
2021-04-01 15:15:38 +00:00
|
|
|
base: 2..4,
|
|
|
|
left: 4..6,
|
|
|
|
right: 2..4
|
2021-01-19 06:00:40 +00:00
|
|
|
},
|
|
|
|
SyncRegion {
|
2021-04-01 15:15:38 +00:00
|
|
|
base: 4..6,
|
|
|
|
left: 6..8,
|
|
|
|
right: 6..8
|
2021-01-19 06:00:40 +00:00
|
|
|
},
|
|
|
|
SyncRegion {
|
2021-04-01 15:15:38 +00:00
|
|
|
base: 6..6,
|
|
|
|
left: 8..8,
|
|
|
|
right: 8..8
|
2021-01-19 06:00:40 +00:00
|
|
|
}
|
|
|
|
]
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2020-12-12 08:00:42 +00:00
|
|
|
#[test]
|
|
|
|
fn test_merge() {
|
|
|
|
assert_eq!(merge(b"", b"", b""), MergeResult::Resolved(b"".to_vec()));
|
|
|
|
assert_eq!(
|
|
|
|
merge(b"a", b"a", b"a"),
|
|
|
|
MergeResult::Resolved(b"a".to_vec())
|
|
|
|
);
|
|
|
|
assert_eq!(merge(b"a", b"", b"a"), MergeResult::Resolved(b"".to_vec()));
|
|
|
|
assert_eq!(merge(b"a", b"a", b""), MergeResult::Resolved(b"".to_vec()));
|
|
|
|
assert_eq!(merge(b"a", b"", b""), MergeResult::Resolved(b"".to_vec()));
|
|
|
|
assert_eq!(
|
|
|
|
merge(b"a", b"a b", b"a"),
|
|
|
|
MergeResult::Resolved(b"a b".to_vec())
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
merge(b"a", b"a", b"a b"),
|
|
|
|
MergeResult::Resolved(b"a b".to_vec())
|
|
|
|
);
|
|
|
|
assert_eq!(
|
2021-04-01 15:15:38 +00:00
|
|
|
merge(b"a\n", b"a\nb\n", b"a\nc\n"),
|
2020-12-12 08:00:42 +00:00
|
|
|
MergeResult::Conflict(vec![
|
2021-04-01 15:15:38 +00:00
|
|
|
MergeHunk::Resolved(b"a\n".to_vec()),
|
2020-12-12 08:00:42 +00:00
|
|
|
MergeHunk::Conflict {
|
|
|
|
base: b"".to_vec(),
|
2021-04-01 15:15:38 +00:00
|
|
|
left: b"b\n".to_vec(),
|
|
|
|
right: b"c\n".to_vec()
|
2020-12-12 08:00:42 +00:00
|
|
|
}
|
|
|
|
])
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
merge(b"a", b"b", b"a"),
|
|
|
|
MergeResult::Resolved(b"b".to_vec())
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
merge(b"a", b"a", b"b"),
|
|
|
|
MergeResult::Resolved(b"b".to_vec())
|
|
|
|
);
|
2021-01-19 06:00:40 +00:00
|
|
|
assert_eq!(
|
|
|
|
merge(b"a", b"", b"b"),
|
|
|
|
MergeResult::Conflict(vec![MergeHunk::Conflict {
|
|
|
|
base: b"a".to_vec(),
|
|
|
|
left: b"".to_vec(),
|
|
|
|
right: b"b".to_vec()
|
|
|
|
}])
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
merge(b"a", b"b", b""),
|
|
|
|
MergeResult::Conflict(vec![MergeHunk::Conflict {
|
|
|
|
base: b"a".to_vec(),
|
|
|
|
left: b"b".to_vec(),
|
|
|
|
right: b"".to_vec()
|
|
|
|
}])
|
|
|
|
);
|
2020-12-12 08:00:42 +00:00
|
|
|
assert_eq!(
|
|
|
|
merge(b"a", b"b", b"c"),
|
|
|
|
MergeResult::Conflict(vec![MergeHunk::Conflict {
|
2021-01-19 06:00:40 +00:00
|
|
|
base: b"a".to_vec(),
|
2020-12-12 08:00:42 +00:00
|
|
|
left: b"b".to_vec(),
|
|
|
|
right: b"c".to_vec()
|
|
|
|
}])
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|