From e5e85e781faaabe4a56683ee454e53b7080efcbc Mon Sep 17 00:00:00 2001
From: Yuya Nishihara <yuya@tcha.org>
Date: Wed, 9 Oct 2024 17:16:53 +0900
Subject: [PATCH] diff: inline contents and ranges vecs up to two sides
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This appears to be a bit faster if there are tons of unchanged ranges.

```
group                             new                     old
-----                             ---                     ---
bench_diff_git_git_read_tree_c    1.00     58.5±0.12µs    1.07     62.7±0.60µs
bench_diff_lines/modified/10k     1.00     34.2±0.72ms    1.08     37.0±1.09ms
bench_diff_lines/modified/1k      1.00      3.1±0.08ms    1.12      3.5±0.01ms
bench_diff_lines/reversed/10k     1.00     28.0±0.15ms    1.01     28.4±0.51ms
bench_diff_lines/reversed/1k      1.00   616.0±16.20µs    1.00    617.0±9.29µs
bench_diff_lines/unchanged/10k    1.00      3.5±0.04ms    1.10      3.9±0.06ms
bench_diff_lines/unchanged/1k     1.00    328.4±4.44µs    1.07    352.0±1.41µs
```
---
 cli/src/diff_util.rs |  7 ++++---
 lib/src/diff.rs      | 24 +++++++++++++++---------
 2 files changed, 19 insertions(+), 12 deletions(-)
diff --git a/cli/src/diff_util.rs b/cli/src/diff_util.rs
index de7035146..7c6b4b46f 100644
--- a/cli/src/diff_util.rs
+++ b/cli/src/diff_util.rs
@@ -45,6 +45,7 @@ use jj_lib::diff::CompareBytesIgnoreAllWhitespace;
 use jj_lib::diff::CompareBytesIgnoreWhitespaceAmount;
 use jj_lib::diff::Diff;
 use jj_lib::diff::DiffHunk;
+use jj_lib::diff::DiffHunkContentVec;
 use jj_lib::diff::DiffHunkKind;
 use jj_lib::files::DiffLineHunkSide;
 use jj_lib::files::DiffLineIterator;
@@ -582,7 +583,7 @@ fn show_color_words_diff_hunks(
 /// Prints `num_after` lines, ellipsis, and `num_before` lines.
 fn show_color_words_context_lines(
     formatter: &mut dyn Formatter,
-    contexts: &[Vec<&BStr>],
+    contexts: &[DiffHunkContentVec],
     mut line_number: DiffLineNumber,
     options: &ColorWordsDiffOptions,
     num_after: usize,
@@ -1288,7 +1289,7 @@ fn unified_diff_hunks<'content>(
                 // Just use the right (i.e. new) content. We could count the
                 // number of skipped lines separately, but the number of the
                 // context lines should match the displayed content.
-                let [_, right] = hunk.contents.try_into().unwrap();
+                let [_, right] = hunk.contents[..].try_into().unwrap();
                 let mut lines = right.split_inclusive(|b| *b == b'\n').fuse();
                 if !current_hunk.lines.is_empty() {
                     // The previous hunk line should be either removed/added.
@@ -1604,7 +1605,7 @@ fn get_diff_stat(
         match hunk.kind {
             DiffHunkKind::Matching => {}
             DiffHunkKind::Different => {
-                let [left, right] = hunk.contents.try_into().unwrap();
+                let [left, right] = hunk.contents[..].try_into().unwrap();
                 removed += left.split_inclusive(|b| *b == b'\n').count();
                 added += right.split_inclusive(|b| *b == b'\n').count();
             }
diff --git a/lib/src/diff.rs b/lib/src/diff.rs
index 18114c943..4903aa2ec 100644
--- a/lib/src/diff.rs
+++ b/lib/src/diff.rs
@@ -26,6 +26,8 @@ use std::slice;
 use bstr::BStr;
 use hashbrown::HashTable;
 use itertools::Itertools;
+use smallvec::smallvec;
+use smallvec::SmallVec;
 
 pub fn find_line_ranges(text: &[u8]) -> Vec<Range<usize>> {
     text.split_inclusive(|b| *b == b'\n')
@@ -508,8 +510,9 @@ fn intersect_unchanged_words(
 
 #[derive(Clone, PartialEq, Eq, Debug)]
 struct UnchangedRange {
+    // Inline up to two sides (base + one other)
     base: Range<usize>,
-    others: Vec<Range<usize>>,
+    others: SmallVec<[Range<usize>; 1]>,
 }
 
 impl UnchangedRange {
@@ -538,7 +541,7 @@ impl UnchangedRange {
 #[derive(Clone, Debug)]
 pub struct Diff<'input> {
     base_input: &'input BStr,
-    other_inputs: Vec<&'input BStr>,
+    other_inputs: SmallVec<[&'input BStr; 1]>,
     /// Sorted list of ranges of unchanged regions in bytes.
     ///
     /// The list should never be empty. The first and the last region may be
@@ -554,7 +557,7 @@ impl<'input> Diff<'input> {
     ) -> Self {
         let mut inputs = inputs.into_iter().map(BStr::new);
         let base_input = inputs.next().expect("inputs must not be empty");
-        let other_inputs = inputs.collect_vec();
+        let other_inputs: SmallVec<[&BStr; 1]> = inputs.collect();
         // First tokenize each input
         let base_token_ranges: Vec<Range<usize>>;
         let other_token_ranges: Vec<Vec<Range<usize>>>;
@@ -583,7 +586,7 @@ impl<'input> Diff<'input> {
 
     fn with_inputs_and_token_ranges(
         base_input: &'input BStr,
-        other_inputs: Vec<&'input BStr>,
+        other_inputs: SmallVec<[&'input BStr; 1]>,
         base_token_ranges: &[Range<usize>],
         other_token_ranges: &[Vec<Range<usize>>],
         compare: impl CompareBytes,
@@ -600,7 +603,7 @@ impl<'input> Diff<'input> {
             [] => {
                 let whole_range = UnchangedRange {
                     base: 0..base_source.text.len(),
-                    others: vec![],
+                    others: smallvec![],
                 };
                 vec![whole_range]
             }
@@ -611,7 +614,7 @@ impl<'input> Diff<'input> {
                 // Add an empty range at the start to make life easier for hunks().
                 unchanged_regions.push(UnchangedRange {
                     base: 0..0,
-                    others: vec![0..0; other_inputs.len()],
+                    others: smallvec![0..0; other_inputs.len()],
                 });
                 let mut first_positions = Vec::new();
                 collect_unchanged_words(
@@ -795,7 +798,7 @@ impl<'input> Diff<'input> {
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct DiffHunk<'input> {
     pub kind: DiffHunkKind,
-    pub contents: Vec<&'input BStr>,
+    pub contents: DiffHunkContentVec<'input>,
 }
 
 impl<'input> DiffHunk<'input> {
@@ -824,6 +827,9 @@ pub enum DiffHunkKind {
     Different,
 }
 
+// Inline up to two sides
+pub type DiffHunkContentVec<'input> = SmallVec<[&'input BStr; 2]>;
+
 pub struct DiffHunkIterator<'diff, 'input> {
     diff: &'diff Diff<'input>,
     previous: &'diff UnchangedRange,
@@ -850,12 +856,12 @@ impl<'diff, 'input> Iterator for DiffHunkIterator<'diff, 'input> {
     fn next(&mut self) -> Option<Self::Item> {
         if !self.unchanged_emitted {
             self.unchanged_emitted = true;
-            let contents = self.diff.hunk_at(self.previous).collect_vec();
+            let contents = self.diff.hunk_at(self.previous).collect();
             let kind = DiffHunkKind::Matching;
             return Some(DiffHunk { kind, contents });
         }
         let current = self.unchanged_iter.next()?;
-        let contents = self.diff.hunk_between(self.previous, current).collect_vec();
+        let contents: DiffHunkContentVec = self.diff.hunk_between(self.previous, current).collect();
         debug_assert!(
             contents.iter().any(|content| !content.is_empty()),
             "unchanged regions should have been compacted"