lib merge: the key explain_diff_of_merges function and data structure

This turns two Merge-s into an object that represents a way to present their difference to the user, in the form of `Vec<DiffExplanationAtom>`. `DiffExplanationAtom` is the key structure for the data model of differences between diffs. The algorithm is inefficient and far from perfect, for now my focus is on the data model.
2024-12-24 12:48:55 +00:00 · 2024-08-05 15:05:42 -07:00 · 2024-08-05 15:05:42 -07:00 · d987a3e9f3
commit d987a3e9f3
parent 38eaa7f39f
1 changed files with 340 additions and 0 deletions
--- a/lib/src/merge.rs
+++ b/lib/src/merge.rs
@ -209,6 +209,11 @@ impl<T> Merge<T> {
        self.values.get(index * 2 + 1)
    }

+    /// Returns the `index`-th removed value mutably
+    pub fn get_remove_mut(&mut self, index: usize) -> Option<&mut T> {
+        self.values.get_mut(index * 2 + 1)
+    }
+
    /// Returns the `index`-th added value, which is considered belonging to the
    /// `index-1`-th diff pair. The zeroth add is a diff from the non-existent
    /// state.
@ -216,6 +221,11 @@ impl<T> Merge<T> {
        self.values.get(index * 2)
    }

+    /// Returns the `index`-th added value mutably
+    pub fn get_add_mut(&mut self, index: usize) -> Option<&mut T> {
+        self.values.get_mut(index * 2)
+    }
+
    /// Removes the specified "removed"/"added" values. The removed slots are
    /// replaced by the last "removed"/"added" values.
    pub fn swap_remove(&mut self, remove_index: usize, add_index: usize) -> (T, T) {
@ -768,6 +778,16 @@ impl<T> DiffOfMerges<T> {
        DiffOfMerges { values }
    }

+    /// Diff of the `left` conflict ("from" side) and the `right` conflict
+    /// ("to") side
+    pub fn diff_of(left: Merge<T>, right: Merge<T>) -> DiffOfMerges<T> {
+        // The last element of right's vector is an add and should stay an add.
+        // The first element of left's vector should become a remove.
+        let mut result_vec = right.values.into_vec();
+        result_vec.append(&mut left.values.into_vec());
+        DiffOfMerges::from_vec(result_vec)
+    }
+
    /// Simplify the diff by removing equal positive and negative terms.
    ///
    /// The positive terms are allowed to be reordered freely, as are the
@ -780,6 +800,17 @@ impl<T> DiffOfMerges<T> {
        self
    }

+    /// Pairs of diffs presented as (positive term, negative term)
+    ///
+    /// Note that operations such as simplifications do *not* preserve these
+    /// pairs.
+    pub fn pairs(&self) -> impl ExactSizeIterator<Item = (&T, &T)> {
+        zip(
+            self.values.iter().step_by(2),
+            self.values.iter().skip(1).step_by(2),
+        )
+    }
+
    /// Rearranges the removes so that every pair of add and remove has as small
    /// a distance as possible.
    ///
@ -798,6 +829,198 @@ impl<T> DiffOfMerges<T> {
    }
 }

+/// Given two conflicts, compute the corresponding set of
+/// [`DiffExplanationAtom<T>`].
+///
+/// The diffs in the set are picked to minimize `distance`.
+///
+/// This returns a Vec, but the order of the resulting set is not significant.
+//
+// TODO(ilyagr): There is a question of whether we want to bias the diffs
+// towards the diffs that match the way the conflicts are presented to the user,
+// or to bias to the diffs that come from the rebased commits after a rebase.
+// Note that this can have implication on how `blame` works. I currently think
+// that we should try to improve the optimization algorithm first and see how
+// far that gets us.
+pub fn explain_diff_of_merges<T: PartialEq + Clone>(
+    left: Merge<T>,
+    right: Merge<T>,
+    distance: impl Fn(&T, &T) -> usize,
+) -> Vec<DiffExplanationAtom<T>> {
+    let left = left.simplify();
+    let right = right.simplify();
+    let optimized_diff_of_merges = DiffOfMerges::diff_of(left.clone(), right.clone())
+        .simplify()
+        .rearranged_to_optimize_for_distance(distance);
+    let mut left_seen = left.map(|_| false);
+    let mut right_seen = right.map(|_| false);
+
+    let mut result = optimized_diff_of_merges
+        .pairs()
+        .map(|(add, remove)| {
+            // The order of `if`-s does not matter since the diff_of_merges is simplified as
+            // is each conflict, so the "add" could not come from both conflicts.
+            let add_comes_from_right = if let Some((ix, _)) = right
+                .adds()
+                .enumerate()
+                .find(|(ix, elt)| !right_seen.get_add(*ix).unwrap() && *elt == add)
+            {
+                *right_seen.get_add_mut(ix).unwrap() = true;
+                true
+            } else if let Some((ix, _)) = left
+                .removes()
+                .enumerate()
+                .find(|(ix, elt)| !left_seen.get_remove(*ix).unwrap() && *elt == add)
+            {
+                *left_seen.get_remove_mut(ix).unwrap() = true;
+                false
+            } else {
+                panic!(
+                    "adds of (right - left) should be either adds on the right or removes on the \
+                     left."
+                )
+            };
+
+            let remove_comes_from_right = if let Some((ix, _)) = right
+                .removes()
+                .enumerate()
+                .find(|(ix, elt)| !right_seen.get_remove(*ix).unwrap() && *elt == remove)
+            {
+                *right_seen.get_remove_mut(ix).unwrap() = true;
+                true
+            } else if let Some((ix, _)) = left
+                .adds()
+                .enumerate()
+                .find(|(ix, elt)| !left_seen.get_add(*ix).unwrap() && *elt == remove)
+            {
+                *left_seen.get_add_mut(ix).unwrap() = true;
+                false
+            } else {
+                panic!(
+                    "removes of (right - left) should be either removes on the right or adds on \
+                     the left."
+                )
+            };
+
+            // TODO(ilyagr): Consider refactoring this to have fewer unnecessary clones.
+            match (add_comes_from_right, remove_comes_from_right) {
+                (true, true) => DiffExplanationAtom::AddedConflictDiff {
+                    conflict_add: add.clone(),
+                    conflict_remove: remove.clone(),
+                },
+                (false, false) => DiffExplanationAtom::RemovedConflictDiff {
+                    /* Instead of adding an upside-down conflict, consider this a removal of the
+                     * opposite conflict */
+                    conflict_add: remove.clone(),
+                    conflict_remove: add.clone(),
+                },
+                (true, false) => DiffExplanationAtom::ChangedConflictAdd {
+                    left_version: remove.clone(),
+                    right_version: add.clone(),
+                },
+                (false, true) => DiffExplanationAtom::ChangedConflictRemove {
+                    left_version: remove.clone(),
+                    right_version: add.clone(),
+                },
+            }
+        })
+        .collect_vec();
+
+    // Since the conflicts were simplified, and any sides we didn't yet see must
+    // have cancelled out in the diff,they are present on both left and right.
+    // So, we can forget about the left side.
+
+    // TODO(ilyagr): We might be able to have more structure, e.g. have an
+    // `UnchangedConflictDiff` category if there is both an unchanged add and an
+    // unchanged remove *and* they are reasonably close. This should be considered
+    // separately for showing to the user (where it might look confusingly similar
+    // to other diffs that mean something else) and for blame/absorb.
+    result.extend(
+        zip(right.adds(), right_seen.adds())
+            .filter(|&(_elt, seen)| (!seen))
+            .map(|(elt, _seen)| DiffExplanationAtom::UnchangedConflictAdd(elt.clone())),
+    );
+    result.extend(
+        zip(right.removes(), right_seen.removes())
+            .filter(|&(_elt, seen)| (!seen))
+            .map(|(elt, _seen)| DiffExplanationAtom::UnchangedConflictRemove(elt.clone())),
+    );
+    result
+}
+
+/// A statement about a difference of two conflicts of type `Merge<T>`
+///
+/// A (conceptually unordered) set of `DiffExplanationAtom<T>` describes a
+/// conflict `left: Merge<T>` and a sequence of operations to turn it into a
+/// different conflict `right: Merge<T>`. This is designed so that this
+/// information can be presented to the user.
+///
+/// This description is far from unique. We usually pick one by trying to
+/// minimize the complexity of the diffs in those operations that involve diffs.
+#[derive(PartialEq, Eq, Clone, Debug)]
+pub enum DiffExplanationAtom<T> {
+    /// Adds one more pair of an add + a remove to the conflict, making it more
+    /// complicated (more sides than before)
+    AddedConflictDiff {
+        /// Add of the added diff.
+        ///
+        /// This is an "add" of the right conflict that is not an "add" of the
+        /// left conflict.
+        conflict_add: T,
+        /// Remove of the added diff
+        ///
+        /// This is a "remove" of the right conflict that is not a "remove" of
+        /// the left conflict.
+        conflict_remove: T,
+    },
+    /// Removes one of the add + remove pairs in the conflict, making it less
+    /// complicated (fewer sides than before)
+    ///
+    /// In terms of conflict theory, this is equivalent to adding the opposite
+    /// diff and then simplifying the conflict. However, for the user, the
+    /// difference between these presentations is significant.
+    RemovedConflictDiff {
+        /// Add of the removed diff
+        ///
+        /// This is an "add" of the left conflict that is not an "add" of the
+        /// right conflict.
+        conflict_add: T,
+        /// Remove of the removed diff
+        ///
+        /// This is a "remove" of the left conflict that is not a "remove" of
+        /// the right conflict.
+        conflict_remove: T,
+    },
+    /// Modifies one of the adds of the conflict
+    ///
+    /// This does not change the number of sides in the conflict.
+    ChangedConflictAdd {
+        /// Add of the left conflict
+        left_version: T,
+        /// Add of the right conflict
+        right_version: T,
+    },
+    /// Modifies one of the removes of the conflict
+    ///
+    /// This does not change the number of sides in the conflict.
+    // TODO(ilyagr): While this operation is very natural from the perspective of conflict
+    // theory, I find it hard to come up with an example where it is an
+    // intuitive result of some operation. If it's too unintuitive to users, we could try to
+    // replace it with a composition of "removed diff" and "added diff".
+    ChangedConflictRemove {
+        /// Remove of the left conflict
+        left_version: T,
+        /// Remove of the right conflict
+        right_version: T,
+    },
+    /// Declares that both the left and the right conflict contain this value as
+    /// an "add"
+    UnchangedConflictAdd(T),
+    /// Declares that both the left and the right conflict contain this value as
+    /// a "remove"
+    UnchangedConflictRemove(T),
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@ -1355,4 +1578,121 @@ mod tests {
            c(&[3, 2, 1, 6], &[4, 5, 0, 7, 8])
        );
    }
+
+    #[test]
+    fn test_explain_diff_of_merges() {
+        let dist = |x: &usize, y: &usize| x.abs_diff(*y);
+        insta::assert_debug_snapshot!(
+            explain_diff_of_merges(c(&[], &[1]), c(&[], &[1]), dist),
+            @r#"
+        [
+            UnchangedConflictAdd(
+                1,
+            ),
+        ]
+        "#
+        );
+        insta::assert_debug_snapshot!(
+            explain_diff_of_merges(c(&[], &[1]), c(&[], &[2]), dist),
+            @r#"
+        [
+            ChangedConflictAdd {
+                left_version: 1,
+                right_version: 2,
+            },
+        ]
+        "#
+        );
+        insta::assert_debug_snapshot!(
+            // One of the conflicts gets simplified to the same case as above
+            explain_diff_of_merges(c(&[], &[1]), c(&[1], &[1,2]), dist),
+            @r#"
+        [
+            ChangedConflictAdd {
+                left_version: 1,
+                right_version: 2,
+            },
+        ]
+        "#
+        );
+        insta::assert_debug_snapshot!(
+            explain_diff_of_merges(c(&[], &[1]), c(&[0], &[1, 2]), dist),
+            @r#"
+        [
+            AddedConflictDiff {
+                conflict_add: 2,
+                conflict_remove: 0,
+            },
+            UnchangedConflictAdd(
+                1,
+            ),
+        ]
+        "#
+        );
+        insta::assert_debug_snapshot!(
+            explain_diff_of_merges(c(&[0], &[1,2]), c(&[], &[1]), dist),
+            @r#"
+        [
+            RemovedConflictDiff {
+                conflict_add: 2,
+                conflict_remove: 0,
+            },
+            UnchangedConflictAdd(
+                1,
+            ),
+        ]
+        "#
+        );
+        insta::assert_debug_snapshot!(
+            explain_diff_of_merges(c(&[0], &[1,2]), c(&[3], &[1,2]), dist),
+            @r#"
+        [
+            ChangedConflictRemove {
+                left_version: 3,
+                right_version: 0,
+            },
+            UnchangedConflictAdd(
+                1,
+            ),
+            UnchangedConflictAdd(
+                2,
+            ),
+        ]
+        "#
+        );
+        // TODO: Should the unchanged add+remove become "UnchangedConflictDiff" for
+        // nicer presentation?
+        insta::assert_debug_snapshot!(
+            explain_diff_of_merges(c(&[0], &[1,2]), c(&[0], &[1,3]), dist),
+            @r#"
+        [
+            ChangedConflictAdd {
+                left_version: 2,
+                right_version: 3,
+            },
+            UnchangedConflictAdd(
+                1,
+            ),
+            UnchangedConflictRemove(
+                0,
+            ),
+        ]
+        "#
+        );
+        insta::assert_debug_snapshot!(
+            // Simplifies
+            explain_diff_of_merges(c(&[0], &[1,2]), c(&[2], &[1,2]), dist),
+            @r#"
+        [
+            RemovedConflictDiff {
+                conflict_add: 2,
+                conflict_remove: 0,
+            },
+            UnchangedConflictAdd(
+                1,
+            ),
+        ]
+        "#
+        );
+    }
 }