diff: extract helpers that process word-level diffs without interleaving

These functions will be reused by non-inline version of color-words diffs.
2024-08-14 17:47:11 +09:00 · 2024-08-14 17:47:11 +09:00 · 6b2e191b2b
commit 6b2e191b2b
parent decd913cea
1 changed files with 32 additions and 19 deletions
--- a/cli/src/diff_util.rs
+++ b/cli/src/diff_util.rs
@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use std::borrow::Borrow;
 use std::cmp::max;
 use std::collections::HashSet;
 use std::ops::Range;
@ -1061,8 +1062,8 @@ fn unified_diff_hunks<'content>(
                current_hunk.extend_context_lines(before_lines.into_iter().rev());
            }
            DiffHunk::Different(contents) => {
-                let [left, right] = contents.try_into().unwrap();
-                let (left_lines, right_lines) = inline_diff_hunks(left, right);
+                let (left_lines, right_lines) =
+                    unzip_diff_hunks_to_lines(Diff::by_word(contents).hunks());
                current_hunk.extend_removed_lines(left_lines);
                current_hunk.extend_added_lines(right_lines);
            }
@ -1074,19 +1075,21 @@ fn unified_diff_hunks<'content>(
    hunks
 }

-/// Splits line-level hunks into word-level tokens. Returns lists of tokens per
-/// line.
-fn inline_diff_hunks<'content>(
-    left_content: &'content [u8],
-    right_content: &'content [u8],
-) -> (Vec<DiffTokenVec<'content>>, Vec<DiffTokenVec<'content>>) {
+/// Splits `(left, right)` hunk pairs into `(left_lines, right_lines)`.
+fn unzip_diff_hunks_to_lines<'content, I>(
+    diff_hunks: I,
+) -> (Vec<DiffTokenVec<'content>>, Vec<DiffTokenVec<'content>>)
+where
+    I: IntoIterator,
+    I::Item: Borrow<DiffHunk<'content>>,
+{
    let mut left_lines: Vec<DiffTokenVec<'content>> = vec![];
    let mut right_lines: Vec<DiffTokenVec<'content>> = vec![];
    let mut left_tokens: DiffTokenVec<'content> = vec![];
    let mut right_tokens: DiffTokenVec<'content> = vec![];

-    for hunk in Diff::by_word([left_content, right_content]).hunks() {
-        match hunk {
+    for hunk in diff_hunks {
+        match hunk.borrow() {
            DiffHunk::Matching(content) => {
                for token in content.split_inclusive(|b| *b == b'\n') {
                    left_tokens.push((DiffTokenType::Matching, token));
@ -1098,7 +1101,9 @@ fn inline_diff_hunks<'content>(
                }
            }
            DiffHunk::Different(contents) => {
-                let [left, right] = contents.try_into().unwrap();
+                let [left, right] = contents[..]
+                    .try_into()
+                    .expect("hunk should have exactly two inputs");
                for token in left.split_inclusive(|b| *b == b'\n') {
                    left_tokens.push((DiffTokenType::Different, token));
                    if token.ends_with(b"\n") {
@ -1147,14 +1152,7 @@ fn show_unified_diff_hunks(
            };
            formatter.with_label(label, |formatter| {
                write!(formatter, "{sigil}")?;
-                for (token_type, content) in tokens {
-                    match token_type {
-                        DiffTokenType::Matching => formatter.write_all(content)?,
-                        DiffTokenType::Different => formatter
-                            .with_label("token", |formatter| formatter.write_all(content))?,
-                    }
-                }
-                io::Result::Ok(())
+                show_diff_line_tokens(formatter, tokens)
            })?;
            let (_, content) = tokens.last().expect("hunk line must not be empty");
            if !content.ends_with(b"\n") {
@ -1165,6 +1163,21 @@ fn show_unified_diff_hunks(
    Ok(())
 }

+fn show_diff_line_tokens(
+    formatter: &mut dyn Formatter,
+    tokens: &[(DiffTokenType, &[u8])],
+) -> io::Result<()> {
+    for (token_type, content) in tokens {
+        match token_type {
+            DiffTokenType::Matching => formatter.write_all(content)?,
+            DiffTokenType::Different => {
+                formatter.with_label("token", |formatter| formatter.write_all(content))?
+            }
+        }
+    }
+    Ok(())
+}
+
 pub fn show_git_diff(
    formatter: &mut dyn Formatter,
    store: &Store,