conflicts: refactor conflict marker writing and parsing

These changes make the code a bit more readable, and they will make it easier to have conflict markers of different lengths in the next commit.
2025-02-06 20:42:10 +00:00 · 2024-11-24 15:20:18 -06:00 · 2024-11-24 15:20:18 -06:00 · 369e8ea057
commit 369e8ea057
parent 75ce7f6b7f
1 changed files with 171 additions and 117 deletions
--- a/lib/src/conflicts.rs
+++ b/lib/src/conflicts.rs
@ -28,8 +28,6 @@ use futures::StreamExt;
 use futures::TryStreamExt;
 use itertools::Itertools;
 use pollster::FutureExt;
 use regex::bytes::Regex;
 use regex::bytes::RegexBuilder;
 use crate::backend::BackendError;
 use crate::backend::BackendResult;
@ -51,49 +49,25 @@ use crate::merge::MergedTreeValue;
 use crate::repo_path::RepoPath;
 use crate::store::Store;
-const CONFLICT_START_LINE: &str = "<<<<<<<";
+/// Length of conflict markers.
-const CONFLICT_END_LINE: &str = ">>>>>>>";
+pub const CONFLICT_MARKER_LEN: usize = 7;
 const CONFLICT_DIFF_LINE: &str = "%%%%%%%";
 const CONFLICT_MINUS_LINE: &str = "-------";
 const CONFLICT_PLUS_LINE: &str = "+++++++";
 const CONFLICT_GIT_ANCESTOR_LINE: &str = "|||||||";
 const CONFLICT_GIT_SEPARATOR_LINE: &str = "=======";
 const CONFLICT_START_LINE_CHAR: u8 = CONFLICT_START_LINE.as_bytes()[0];
 const CONFLICT_END_LINE_CHAR: u8 = CONFLICT_END_LINE.as_bytes()[0];
 const CONFLICT_DIFF_LINE_CHAR: u8 = CONFLICT_DIFF_LINE.as_bytes()[0];
 const CONFLICT_MINUS_LINE_CHAR: u8 = CONFLICT_MINUS_LINE.as_bytes()[0];
 const CONFLICT_PLUS_LINE_CHAR: u8 = CONFLICT_PLUS_LINE.as_bytes()[0];
 const CONFLICT_GIT_ANCESTOR_LINE_CHAR: u8 = CONFLICT_GIT_ANCESTOR_LINE.as_bytes()[0];
 const CONFLICT_GIT_SEPARATOR_LINE_CHAR: u8 = CONFLICT_GIT_SEPARATOR_LINE.as_bytes()[0];
 /// A conflict marker is one of the separators, optionally followed by a space
 /// and some text.
 // TODO: All the `{7}` could be replaced with `{7,}` to allow longer
 // separators. This could be useful to make it possible to allow conflict
 // markers inside the text of the conflicts.
 static CONFLICT_MARKER_REGEX: once_cell::sync::Lazy<Regex> = once_cell::sync::Lazy::new(|| {
    RegexBuilder::new(r"^(<{7}|>{7}|%{7}|\-{7}|\+{7}|\|{7}|={7})( .*)?$")
        .multi_line(true)
        .build()
        .unwrap()
 });
 fn write_diff_hunks(hunks: &[DiffHunk], file: &mut dyn Write) -> io::Result<()> {
    for hunk in hunks {
        match hunk.kind {
            DiffHunkKind::Matching => {
                debug_assert!(hunk.contents.iter().all_equal());
-                for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
+                for line in hunk.contents[0].lines_with_terminator() {
                    file.write_all(b" ")?;
                    file.write_all(line)?;
                }
            }
            DiffHunkKind::Different => {
-                for line in hunk.contents[0].split_inclusive(|b| *b == b'\n') {
+                for line in hunk.contents[0].lines_with_terminator() {
                    file.write_all(b"-")?;
                    file.write_all(line)?;
                }
-                for line in hunk.contents[1].split_inclusive(|b| *b == b'\n') {
+                for line in hunk.contents[1].lines_with_terminator() {
                    file.write_all(b"+")?;
                    file.write_all(line)?;
                }
@ -250,6 +224,77 @@ pub enum ConflictMarkerStyle {
    Git,
 }
 /// Characters which can be repeated to form a conflict marker line when
 /// materializing and parsing conflicts.
 #[derive(Clone, Copy, PartialEq, Eq)]
 #[repr(u8)]
 enum ConflictMarkerLineChar {
    ConflictStart = b'<',
    ConflictEnd = b'>',
    Add = b'+',
    Remove = b'-',
    Diff = b'%',
    GitAncestor = b'|',
    GitSeparator = b'=',
 }
 impl ConflictMarkerLineChar {
    /// Get the ASCII byte used for this conflict marker.
    fn to_byte(self) -> u8 {
        self as u8
    }
    /// Parse a byte to see if it corresponds with any kind of conflict marker.
    fn parse_byte(byte: u8) -> Option<Self> {
        match byte {
            b'<' => Some(Self::ConflictStart),
            b'>' => Some(Self::ConflictEnd),
            b'+' => Some(Self::Add),
            b'-' => Some(Self::Remove),
            b'%' => Some(Self::Diff),
            b'|' => Some(Self::GitAncestor),
            b'=' => Some(Self::GitSeparator),
            _ => None,
        }
    }
 }
 /// Write a conflict marker to an output file.
 fn write_conflict_marker(
    output: &mut dyn Write,
    kind: ConflictMarkerLineChar,
    suffix_text: &str,
 ) -> io::Result<()> {
    let conflict_marker = BString::new(vec![kind.to_byte(); CONFLICT_MARKER_LEN]);
    if suffix_text.is_empty() {
        writeln!(output, "{conflict_marker}")
    } else {
        writeln!(output, "{conflict_marker} {suffix_text}")
    }
 }
 /// Parse a conflict marker from a line of a file. The conflict marker must have
 /// the correct length (CONFLICT_MARKER_LEN).
 fn parse_conflict_marker(line: &[u8]) -> Option<ConflictMarkerLineChar> {
    let first_byte = *line.first()?;
    let kind = ConflictMarkerLineChar::parse_byte(first_byte)?;
    let len = line.iter().take_while(|&&b| b == first_byte).count();
    if len != CONFLICT_MARKER_LEN {
        return None;
    }
    if let Some(next_byte) = line.get(len) {
        // If there is a character after the marker, it must be ASCII whitespace
        if !next_byte.is_ascii_whitespace() {
            return None;
        }
    }
    Some(kind)
 }
 pub fn materialize_merge_result<T: AsRef<[u8]>>(
    single_hunk: &Merge<T>,
    conflict_marker_style: ConflictMarkerStyle,
@ -323,14 +368,22 @@ fn materialize_git_style_conflict(
    conflict_info: &str,
    output: &mut dyn Write,
 ) -> io::Result<()> {
-    writeln!(output, "{CONFLICT_START_LINE} Side #1 ({conflict_info})")?;
+    write_conflict_marker(
        output,
        ConflictMarkerLineChar::ConflictStart,
        &format!("Side #1 ({conflict_info})"),
    )?;
    output.write_all(left)?;
-    writeln!(output, "{CONFLICT_GIT_ANCESTOR_LINE} Base")?;
+    write_conflict_marker(output, ConflictMarkerLineChar::GitAncestor, "Base")?;
    output.write_all(base)?;
    // VS Code doesn't seem to support any trailing text on the separator line
-    writeln!(output, "{CONFLICT_GIT_SEPARATOR_LINE}")?;
+    write_conflict_marker(output, ConflictMarkerLineChar::GitSeparator, "")?;
    output.write_all(right)?;
-    writeln!(output, "{CONFLICT_END_LINE} Side #2 ({conflict_info} ends)")?;
+    write_conflict_marker(
        output,
        ConflictMarkerLineChar::ConflictEnd,
        &format!("Side #2 ({conflict_info} ends)"),
    )?;
    Ok(())
 }
@ -343,17 +396,21 @@ fn materialize_jj_style_conflict(
 ) -> io::Result<()> {
    // Write a positive snapshot (side) of a conflict
    fn write_side(add_index: usize, data: &[u8], output: &mut dyn Write) -> io::Result<()> {
-        writeln!(
+        write_conflict_marker(
            output,
-            "{CONFLICT_PLUS_LINE} Contents of side #{}",
+            ConflictMarkerLineChar::Add,
-            add_index + 1
+            &format!("Contents of side #{}", add_index + 1),
        )?;
        output.write_all(data)
    }
    // Write a negative snapshot (base) of a conflict
    fn write_base(base_str: &str, data: &[u8], output: &mut dyn Write) -> io::Result<()> {
-        writeln!(output, "{CONFLICT_MINUS_LINE} Contents of {base_str}")?;
+        write_conflict_marker(
            output,
            ConflictMarkerLineChar::Remove,
            &format!("Contents of {base_str}"),
        )?;
        output.write_all(data)
    }
@ -364,15 +421,15 @@ fn materialize_jj_style_conflict(
        diff: &[DiffHunk],
        output: &mut dyn Write,
    ) -> io::Result<()> {
-        writeln!(
+        write_conflict_marker(
            output,
-            "{CONFLICT_DIFF_LINE} Changes from {base_str} to side #{}",
+            ConflictMarkerLineChar::Diff,
-            add_index + 1
+            &format!("Changes from {base_str} to side #{}", add_index + 1),
        )?;
        write_diff_hunks(diff, output)
    }
-    writeln!(output, "{CONFLICT_START_LINE} {conflict_info}")?;
+    write_conflict_marker(output, ConflictMarkerLineChar::ConflictStart, conflict_info)?;
    let mut add_index = 0;
    for (base_index, left) in hunk.removes().enumerate() {
        // The vast majority of conflicts one actually tries to resolve manually have 1
@ -422,7 +479,11 @@ fn materialize_jj_style_conflict(
    for (add_index, slice) in hunk.adds().enumerate().skip(add_index) {
        write_side(add_index, slice, output)?;
    }
-    writeln!(output, "{CONFLICT_END_LINE} {conflict_info} ends")?;
+    write_conflict_marker(
        output,
        ConflictMarkerLineChar::ConflictEnd,
        &format!("{conflict_info} ends"),
    )?;
    Ok(())
 }
@ -480,25 +541,28 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<BStrin
    let mut resolved_start = 0;
    let mut conflict_start = None;
    let mut conflict_start_len = 0;
-    for line in input.split_inclusive(|b| *b == b'\n') {
+    for line in input.lines_with_terminator() {
-        if is_conflict_marker_line(line) {
+        match parse_conflict_marker(line) {
-            if line[0] == CONFLICT_START_LINE_CHAR {
+            Some(ConflictMarkerLineChar::ConflictStart) => {
                conflict_start = Some(pos);
                conflict_start_len = line.len();
-            } else if conflict_start.is_some() && line[0] == CONFLICT_END_LINE_CHAR {
+            }
-                let conflict_body = &input[conflict_start.unwrap() + conflict_start_len..pos];
+            Some(ConflictMarkerLineChar::ConflictEnd) => {
                if let Some(conflict_start_index) = conflict_start.take() {
                    let conflict_body = &input[conflict_start_index + conflict_start_len..pos];
                    let hunk = parse_conflict_hunk(conflict_body);
                    if hunk.num_sides() == num_sides {
-                    let resolved_slice = &input[resolved_start..conflict_start.unwrap()];
+                        let resolved_slice = &input[resolved_start..conflict_start_index];
                        if !resolved_slice.is_empty() {
                            hunks.push(Merge::resolved(BString::from(resolved_slice)));
                        }
                        hunks.push(hunk);
                        resolved_start = pos + line.len();
                    }
                conflict_start = None;
                }
            }
            _ => {}
        }
        pos += line.len();
    }
@ -519,20 +583,21 @@ pub fn parse_conflict(input: &[u8], num_sides: usize) -> Option<Vec<Merge<BStrin
 /// line of the hunk.
 fn parse_conflict_hunk(input: &[u8]) -> Merge<BString> {
    // If the hunk starts with a conflict marker, find its first character
-    let initial_conflict_marker_char = input
+    let initial_conflict_marker = input
        .lines_with_terminator()
        .next()
-        .filter(|line| is_conflict_marker_line(line))
+        .and_then(parse_conflict_marker);
        .map(|line| line[0]);
-    match initial_conflict_marker_char {
+    match initial_conflict_marker {
        // JJ-style conflicts must start with one of these 3 conflict marker lines
-        Some(CONFLICT_DIFF_LINE_CHAR | CONFLICT_MINUS_LINE_CHAR | CONFLICT_PLUS_LINE_CHAR) => {
+        Some(
-            parse_jj_style_conflict_hunk(input)
+            ConflictMarkerLineChar::Diff
-        }
+            | ConflictMarkerLineChar::Remove
            | ConflictMarkerLineChar::Add,
        ) => parse_jj_style_conflict_hunk(input),
        // Git-style conflicts either must not start with a conflict marker line, or must start with
        // the "|||||||" conflict marker line (if the first side was empty)
-        None | Some(CONFLICT_GIT_ANCESTOR_LINE_CHAR) => parse_git_style_conflict_hunk(input),
+        None | Some(ConflictMarkerLineChar::GitAncestor) => parse_git_style_conflict_hunk(input),
        // No other conflict markers are allowed at the start of a hunk
        Some(_) => Merge::resolved(BString::new(vec![])),
    }
@ -541,35 +606,33 @@ fn parse_conflict_hunk(input: &[u8]) -> Merge<BString> {
 fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
    enum State {
        Diff,
-        Minus,
+        Remove,
-        Plus,
+        Add,
        Unknown,
    }
    let mut state = State::Unknown;
    let mut removes = vec![];
    let mut adds = vec![];
    for line in input.lines_with_terminator() {
-        if is_conflict_marker_line(line) {
+        match parse_conflict_marker(line) {
-            match line[0] {
+            Some(ConflictMarkerLineChar::Diff) => {
                CONFLICT_DIFF_LINE_CHAR => {
                state = State::Diff;
                removes.push(BString::new(vec![]));
                adds.push(BString::new(vec![]));
                continue;
            }
-                CONFLICT_MINUS_LINE_CHAR => {
+            Some(ConflictMarkerLineChar::Remove) => {
-                    state = State::Minus;
+                state = State::Remove;
                removes.push(BString::new(vec![]));
                continue;
            }
-                CONFLICT_PLUS_LINE_CHAR => {
+            Some(ConflictMarkerLineChar::Add) => {
-                    state = State::Plus;
+                state = State::Add;
                adds.push(BString::new(vec![]));
                continue;
            }
            _ => {}
        }
        }
        match state {
            State::Diff => {
                if let Some(rest) = line.strip_prefix(b"-") {
@ -590,10 +653,10 @@ fn parse_jj_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
                    return Merge::resolved(BString::new(vec![]));
                }
            }
-            State::Minus => {
+            State::Remove => {
                removes.last_mut().unwrap().extend_from_slice(line);
            }
-            State::Plus => {
+            State::Add => {
                adds.last_mut().unwrap().extend_from_slice(line);
            }
            State::Unknown => {
@ -623,9 +686,8 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
    let mut base = BString::new(vec![]);
    let mut right = BString::new(vec![]);
    for line in input.lines_with_terminator() {
-        if is_conflict_marker_line(line) {
+        match parse_conflict_marker(line) {
-            match line[0] {
+            Some(ConflictMarkerLineChar::GitAncestor) => {
                CONFLICT_GIT_ANCESTOR_LINE_CHAR => {
                if state == State::Left {
                    state = State::Base;
                    continue;
@ -634,7 +696,7 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
                    return Merge::resolved(BString::new(vec![]));
                }
            }
-                CONFLICT_GIT_SEPARATOR_LINE_CHAR => {
+            Some(ConflictMarkerLineChar::GitSeparator) => {
                if state == State::Base {
                    state = State::Right;
                    continue;
@ -645,7 +707,6 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
            }
            _ => {}
        }
        }
        match state {
            State::Left => left.extend_from_slice(line),
            State::Base => base.extend_from_slice(line),
@ -661,13 +722,6 @@ fn parse_git_style_conflict_hunk(input: &[u8]) -> Merge<BString> {
    }
 }
 /// Check whether a line is a conflict marker. Removes trailing whitespace
 /// before checking against regex to ensure it parses CRLF endings correctly.
 fn is_conflict_marker_line(line: &[u8]) -> bool {
    let line = line.trim_end_with(|ch| ch.is_ascii_whitespace());
    CONFLICT_MARKER_REGEX.is_match_at(line, 0)
 }
 /// Parses conflict markers in `content` and returns an updated version of
 /// `file_ids` with the new contents. If no (valid) conflict markers remain, a
 /// single resolves `FileId` will be returned.