conflicts: refactor conflict marker writing and parsing

These changes make the code a bit more readable, and they will make it
easier to have conflict markers of different lengths in the next commit.
This commit is contained in:
Scott Taylor 2024-11-24 15:20:18 -06:00 committed by Scott Taylor
parent 75ce7f6b7f
commit 369e8ea057

View file

@ -28,8 +28,6 @@ use futures::StreamExt;
use futures::TryStreamExt; use futures::TryStreamExt;
use itertools::Itertools; use itertools::Itertools;
use pollster::FutureExt; use pollster::FutureExt;
use regex::bytes::Regex;
use regex::bytes::RegexBuilder;
use crate::backend::BackendError; use crate::backend::BackendError;
use crate::backend::BackendResult; use crate::backend::BackendResult;
@ -51,49 +49,25 @@ use crate::merge::MergedTreeValue;
use crate::repo_path::RepoPath; use crate::repo_path::RepoPath;
use crate::store::Store; use crate::store::Store;
const CONFLICT_START_LINE: &str = "<<<<<<<"; /// Length of conflict markers.
const CONFLICT_END_LINE: &str = ">>>>>>>"; pub const CONFLICT_MARKER_LEN: usize = 7;
const CONFLICT_DIFF_LINE: &str = "%%%%%%%";
const CONFLICT_MINUS_LINE: &str = "-------";
const CONFLICT_PLUS_LINE: &str = "+++++++";
const CONFLICT_GIT_ANCESTOR_LINE: &str = "|||||||";
const CONFLICT_GIT_SEPARATOR_LINE: &str = "=======";
const CONFLICT_START_LINE_CHAR: u8 = CONFLICT_START_LINE.as_bytes()[0];
const CONFLICT_END_LINE_CHAR: u8 = CONFLICT_END_LINE.as_bytes()[0];
const CONFLICT_DIFF_LINE_CHAR: u8 = CONFLICT_DIFF_LINE.as_bytes()[0];
const CONFLICT_MINUS_LINE_CHAR: u8 = CONFLICT_MINUS_LINE.as_bytes()[0];
const CONFLICT_PLUS_LINE_CHAR: u8 = CONFLICT_PLUS_LINE.as_bytes()[0];
const CONFLICT_GIT_ANCESTOR_LINE_CHAR: u8 = CONFLICT_GIT_ANCESTOR_LINE.as_bytes()[0];
const CONFLICT_GIT_SEPARATOR_LINE_CHAR: u8 = CONFLICT_GIT_SEPARATOR_LINE.as_bytes()[0];
/// A conflict marker is one of the separators, optionally followed by a space
/// and some text.
// TODO: All the `{7}` could be replaced with `{7,}` to allow longer
// separators. This could be useful to make it possible to allow conflict
// markers inside the text of the conflicts.
static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy<Regex> = once_cell::sync::Lazy::new(|| {
RegexBuilder::new(r"^(<{7}|>{7}|%{7}|\-{7}|\+{7}|\|{7}|={7})( .*)?$")
.multi_line(true)
.build()
.unwrap()
});
fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> { fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
for hunk in hunks { for hunk in hunks {
match hunk.kind { match hunk.kind {
DiffHunkKind::Matching => { DiffHunkKind::Matching => {
debug_assert!(hunk.contents.iter().all_equal()); debug_assert!(hunk.contents.iter().all_equal());
for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') { for line in hunk.contents[0].lines_with_terminator() {
file.write_all(b" ")?; file.write_all(b" ")?;
file.write_all(line)?; file.write_all(line)?;
} }
} }
DiffHunkKind::Different => { DiffHunkKind::Different => {
for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') { for line in hunk.contents[0].lines_with_terminator() {
file.write_all(b"-")?; file.write_all(b"-")?;
file.write_all(line)?; file.write_all(line)?;
} }
for line in hunk.contents[1].split_inclusive(|b| *b == b'\n') { for line in hunk.contents[1].lines_with_terminator() {
file.write_all(b"+")?; file.write_all(b"+")?;
file.write_all(line)?; file.write_all(line)?;
} }
@ -250,6 +224,77 @@ pub enum ConflictMarkerStyle {
Git, Git,
} }
/// Characters which can be repeated to form a conflict marker line when
/// materializing and parsing conflicts.
#[derive(Clone, Copy, PartialEq, Eq)]
#[repr(u8)]
enum ConflictMarkerLineChar {
ConflictStart = b'<',
ConflictEnd = b'>',
Add = b'+',
Remove = b'-',
Diff = b'%',
GitAncestor = b'|',
GitSeparator = b'=',
}
impl ConflictMarkerLineChar {
/// Get the ASCII byte used for this conflict marker.
fn to_byte(self) -> u8 {
self as u8
}
/// Parse a byte to see if it corresponds with any kind of conflict marker.
fn parse_byte(byte: u8) -> Option<Self> {
match byte {
b'<' => Some(Self::ConflictStart),
b'>' => Some(Self::ConflictEnd),
b'+' => Some(Self::Add),
b'-' => Some(Self::Remove),
b'%' => Some(Self::Diff),
b'|' => Some(Self::GitAncestor),
b'=' => Some(Self::GitSeparator),
_ => None,
}
}
}
/// Write a conflict marker to an output file.
fn write_conflict_marker(
output: &mut dyn Write,
kind: ConflictMarkerLineChar,
suffix_text: &str,
) -> io::Result<()> {
let conflict_marker = BString::new(vec![kind.to_byte(); CONFLICT_MARKER_LEN]);
if suffix_text.is_empty() {
writeln!(output, "{conflict_marker}")
} else {
writeln!(output, "{conflict_marker} {suffix_text}")
}
}
/// Parse a conflict marker from a line of a file. The conflict marker must have
/// the correct length (CONFLICT_MARKER_LEN).
fn parse_conflict_marker(line: &[u8]) -> Option<ConflictMarkerLineChar> {
let first_byte = *line.first()?;
let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
let len = line.iter().take_while(|&&b| b == first_byte).count();
if len != CONFLICT_MARKER_LEN {
return None;
}
if let Some(next_byte) = line.get(len) {
// If there is a character after the marker, it must be ASCII whitespace
if !next_byte.is_ascii_whitespace() {
return None;
}
}
Some(kind)
}
pub fn materialize_merge_result<T: AsRef<[u8]>>( pub fn materialize_merge_result<T: AsRef<[u8]>>(
single_hunk: &Merge<T>, single_hunk: &Merge<T>,
conflict_marker_style: ConflictMarkerStyle, conflict_marker_style: ConflictMarkerStyle,
@ -323,14 +368,22 @@ fn materialize_git_style_conflict(
conflict_info: &str, conflict_info: &str,
output: &mut dyn Write, output: &mut dyn Write,
) -> io::Result<()> { ) -> io::Result<()> {
writeln!(output, "{CONFLICT_START_LINE} Side #1 ({conflict_info})")?; write_conflict_marker(
output,
ConflictMarkerLineChar::ConflictStart,
&format!("Side #1 ({conflict_info})"),
)?;
output.write_all(left)?; output.write_all(left)?;
writeln!(output, "{CONFLICT_GIT_ANCESTOR_LINE} Base")?; write_conflict_marker(output, ConflictMarkerLineChar::GitAncestor, "Base")?;
output.write_all(base)?; output.write_all(base)?;
// VS Code doesn't seem to support any trailing text on the separator line // VS Code doesn't seem to support any trailing text on the separator line
writeln!(output, "{CONFLICT_GIT_SEPARATOR_LINE}")?; write_conflict_marker(output, ConflictMarkerLineChar::GitSeparator, "")?;
output.write_all(right)?; output.write_all(right)?;
writeln!(output, "{CONFLICT_END_LINE} Side #2 ({conflict_info} ends)")?; write_conflict_marker(
output,
ConflictMarkerLineChar::ConflictEnd,
&format!("Side #2 ({conflict_info} ends)"),
)?;
Ok(()) Ok(())
} }
@ -343,17 +396,21 @@ fn materialize_jj_style_conflict(
) -> io::Result<()> { ) -> io::Result<()> {
// Write a positive snapshot (side) of a conflict // Write a positive snapshot (side) of a conflict
fn write_side(add_index: usize, data: &[u8], output: &mut dyn Write) -> io::Result<()> { fn write_side(add_index: usize, data: &[u8], output: &mut dyn Write) -> io::Result<()> {
writeln!( write_conflict_marker(
output, output,
"{CONFLICT_PLUS_LINE} Contents of side #{}", ConflictMarkerLineChar::Add,
add_index + 1 &format!("Contents of side #{}", add_index + 1),
)?; )?;
output.write_all(data) output.write_all(data)
} }
// Write a negative snapshot (base) of a conflict // Write a negative snapshot (base) of a conflict
fn write_base(base_str: &str, data: &[u8], output: &mut dyn Write) -> io::Result<()> { fn write_base(base_str: &str, data: &[u8], output: &mut dyn Write) -> io::Result<()> {
writeln!(output, "{CONFLICT_MINUS_LINE} Contents of {base_str}")?; write_conflict_marker(
output,
ConflictMarkerLineChar::Remove,
&format!("Contents of {base_str}"),
)?;
output.write_all(data) output.write_all(data)
} }
@ -364,15 +421,15 @@ fn materialize_jj_style_conflict(
diff: &[DiffHunk], diff: &[DiffHunk],
output: &mut dyn Write, output: &mut dyn Write,
) -> io::Result<()> { ) -> io::Result<()> {
writeln!( write_conflict_marker(
output, output,
"{CONFLICT_DIFF_LINE} Changes from {base_str} to side #{}", ConflictMarkerLineChar::Diff,
add_index + 1 &format!("Changes from {base_str} to side #{}", add_index + 1),
)?; )?;
write_diff_hunks(diff, output) write_diff_hunks(diff, output)
} }
writeln!(output, "{CONFLICT_START_LINE} {conflict_info}")?; write_conflict_marker(output, ConflictMarkerLineChar::ConflictStart, conflict_info)?;
let mut add_index = 0; let mut add_index = 0;
for (base_index, left) in hunk.removes().enumerate() { for (base_index, left) in hunk.removes().enumerate() {
// The vast majority of conflicts one actually tries to resolve manually have 1 // The vast majority of conflicts one actually tries to resolve manually have 1
@ -422,7 +479,11 @@ fn materialize_jj_style_conflict(
for (add_index, slice) in hunk.adds().enumerate().skip(add_index) { for (add_index, slice) in hunk.adds().enumerate().skip(add_index) {
write_side(add_index, slice, output)?; write_side(add_index, slice, output)?;
} }
writeln!(output, "{CONFLICT_END_LINE} {conflict_info} ends")?; write_conflict_marker(
output,
ConflictMarkerLineChar::ConflictEnd,
&format!("{conflict_info} ends"),
)?;
Ok(()) Ok(())
} }
@ -480,25 +541,28 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<BStrin
let mut resolved_start = 0; let mut resolved_start = 0;
let mut conflict_start = None; let mut conflict_start = None;
let mut conflict_start_len = 0; let mut conflict_start_len = 0;
for line in input.split_inclusive(|b| *b == b'\n') { for line in input.lines_with_terminator() {
if is_conflict_marker_line(line) { match parse_conflict_marker(line) {
if line[0] == CONFLICT_START_LINE_CHAR { Some(ConflictMarkerLineChar::ConflictStart) => {
conflict_start = Some(pos); conflict_start = Some(pos);
conflict_start_len = line.len(); conflict_start_len = line.len();
} else if conflict_start.is_some() && line[0] == CONFLICT_END_LINE_CHAR { }
let conflict_body = &input[conflict_start.unwrap() + conflict_start_len..pos]; Some(ConflictMarkerLineChar::ConflictEnd) => {
if let Some(conflict_start_index) = conflict_start.take() {
let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
let hunk = parse_conflict_hunk(conflict_body); let hunk = parse_conflict_hunk(conflict_body);
if hunk.num_sides() == num_sides { if hunk.num_sides() == num_sides {
let resolved_slice = &input[resolved_start..conflict_start.unwrap()]; let resolved_slice = &input[resolved_start..conflict_start_index];
if !resolved_slice.is_empty() { if !resolved_slice.is_empty() {
hunks.push(Merge::resolved(BString::from(resolved_slice))); hunks.push(Merge::resolved(BString::from(resolved_slice)));
} }
hunks.push(hunk); hunks.push(hunk);
resolved_start = pos + line.len(); resolved_start = pos + line.len();
} }
conflict_start = None;
} }
} }
_ => {}
}
pos += line.len(); pos += line.len();
} }
@ -519,20 +583,21 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<BStrin
/// line of the hunk. /// line of the hunk.
fn parse_conflict_hunk(input: &[u8]) -> Merge<BString> { fn parse_conflict_hunk(input: &[u8]) -> Merge<BString> {
// If the hunk starts with a conflict marker, find its first character // If the hunk starts with a conflict marker, find its first character
let initial_conflict_marker_char = input let initial_conflict_marker = input
.lines_with_terminator() .lines_with_terminator()
.next() .next()
.filter(|line| is_conflict_marker_line(line)) .and_then(parse_conflict_marker);
.map(|line| line[0]);
match initial_conflict_marker_char { match initial_conflict_marker {
// JJ-style conflicts must start with one of these 3 conflict marker lines // JJ-style conflicts must start with one of these 3 conflict marker lines
Some(CONFLICT_DIFF_LINE_CHAR | CONFLICT_MINUS_LINE_CHAR | CONFLICT_PLUS_LINE_CHAR) => { Some(
parse_jj_style_conflict_hunk(input) ConflictMarkerLineChar::Diff
} | ConflictMarkerLineChar::Remove
| ConflictMarkerLineChar::Add,
) => parse_jj_style_conflict_hunk(input),
// Git-style conflicts either must not start with a conflict marker line, or must start with // Git-style conflicts either must not start with a conflict marker line, or must start with
// the "|||||||" conflict marker line (if the first side was empty) // the "|||||||" conflict marker line (if the first side was empty)
None | Some(CONFLICT_GIT_ANCESTOR_LINE_CHAR) => parse_git_style_conflict_hunk(input), None | Some(ConflictMarkerLineChar::GitAncestor) => parse_git_style_conflict_hunk(input),
// No other conflict markers are allowed at the start of a hunk // No other conflict markers are allowed at the start of a hunk
Some(_) => Merge::resolved(BString::new(vec![])), Some(_) => Merge::resolved(BString::new(vec![])),
} }
@ -541,35 +606,33 @@ fn parse_conflict_hunk(input: &[u8]) -> Merge<BString> {
fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge<BString> { fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
enum State { enum State {
Diff, Diff,
Minus, Remove,
Plus, Add,
Unknown, Unknown,
} }
let mut state = State::Unknown; let mut state = State::Unknown;
let mut removes = vec![]; let mut removes = vec![];
let mut adds = vec![]; let mut adds = vec![];
for line in input.lines_with_terminator() { for line in input.lines_with_terminator() {
if is_conflict_marker_line(line) { match parse_conflict_marker(line) {
match line[0] { Some(ConflictMarkerLineChar::Diff) => {
CONFLICT_DIFF_LINE_CHAR => {
state = State::Diff; state = State::Diff;
removes.push(BString::new(vec![])); removes.push(BString::new(vec![]));
adds.push(BString::new(vec![])); adds.push(BString::new(vec![]));
continue; continue;
} }
CONFLICT_MINUS_LINE_CHAR => { Some(ConflictMarkerLineChar::Remove) => {
state = State::Minus; state = State::Remove;
removes.push(BString::new(vec![])); removes.push(BString::new(vec![]));
continue; continue;
} }
CONFLICT_PLUS_LINE_CHAR => { Some(ConflictMarkerLineChar::Add) => {
state = State::Plus; state = State::Add;
adds.push(BString::new(vec![])); adds.push(BString::new(vec![]));
continue; continue;
} }
_ => {} _ => {}
} }
}
match state { match state {
State::Diff => { State::Diff => {
if let Some(rest) = line.strip_prefix(b"-") { if let Some(rest) = line.strip_prefix(b"-") {
@ -590,10 +653,10 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
return Merge::resolved(BString::new(vec![])); return Merge::resolved(BString::new(vec![]));
} }
} }
State::Minus => { State::Remove => {
removes.last_mut().unwrap().extend_from_slice(line); removes.last_mut().unwrap().extend_from_slice(line);
} }
State::Plus => { State::Add => {
adds.last_mut().unwrap().extend_from_slice(line); adds.last_mut().unwrap().extend_from_slice(line);
} }
State::Unknown => { State::Unknown => {
@ -623,9 +686,8 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
let mut base = BString::new(vec![]); let mut base = BString::new(vec![]);
let mut right = BString::new(vec![]); let mut right = BString::new(vec![]);
for line in input.lines_with_terminator() { for line in input.lines_with_terminator() {
if is_conflict_marker_line(line) { match parse_conflict_marker(line) {
match line[0] { Some(ConflictMarkerLineChar::GitAncestor) => {
CONFLICT_GIT_ANCESTOR_LINE_CHAR => {
if state == State::Left { if state == State::Left {
state = State::Base; state = State::Base;
continue; continue;
@ -634,7 +696,7 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
return Merge::resolved(BString::new(vec![])); return Merge::resolved(BString::new(vec![]));
} }
} }
CONFLICT_GIT_SEPARATOR_LINE_CHAR => { Some(ConflictMarkerLineChar::GitSeparator) => {
if state == State::Base { if state == State::Base {
state = State::Right; state = State::Right;
continue; continue;
@ -645,7 +707,6 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
} }
_ => {} _ => {}
} }
}
match state { match state {
State::Left => left.extend_from_slice(line), State::Left => left.extend_from_slice(line),
State::Base => base.extend_from_slice(line), State::Base => base.extend_from_slice(line),
@ -661,13 +722,6 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
} }
} }
/// Check whether a line is a conflict marker. Removes trailing whitespace
/// before checking against regex to ensure it parses CRLF endings correctly.
fn is_conflict_marker_line(line: &[u8]) -> bool {
let line = line.trim_end_with(|ch| ch.is_ascii_whitespace());
CONFLICT_MARKER_REGEX.is_match_at(line, 0)
}
/// Parses conflict markers in `content` and returns an updated version of /// Parses conflict markers in `content` and returns an updated version of
/// `file_ids` with the new contents. If no (valid) conflict markers remain, a /// `file_ids` with the new contents. If no (valid) conflict markers remain, a
/// single resolves `FileId` will be returned. /// single resolves `FileId` will be returned.