mirror of
https://github.com/martinvonz/jj.git
synced 2025-01-21 04:00:29 +00:00
diff: inline contents and ranges vecs up to two sides
This appears to be a bit faster if there are tons of unchanged ranges. ``` group new old ----- --- --- bench_diff_git_git_read_tree_c 1.00 58.5±0.12µs 1.07 62.7±0.60µs bench_diff_lines/modified/10k 1.00 34.2±0.72ms 1.08 37.0±1.09ms bench_diff_lines/modified/1k 1.00 3.1±0.08ms 1.12 3.5±0.01ms bench_diff_lines/reversed/10k 1.00 28.0±0.15ms 1.01 28.4±0.51ms bench_diff_lines/reversed/1k 1.00 616.0±16.20µs 1.00 617.0±9.29µs bench_diff_lines/unchanged/10k 1.00 3.5±0.04ms 1.10 3.9±0.06ms bench_diff_lines/unchanged/1k 1.00 328.4±4.44µs 1.07 352.0±1.41µs ```
This commit is contained in:
parent
92103f4db2
commit
e5e85e781f
2 changed files with 19 additions and 12 deletions
|
@ -45,6 +45,7 @@ use jj_lib::diff::CompareBytesIgnoreAllWhitespace;
|
|||
use jj_lib::diff::CompareBytesIgnoreWhitespaceAmount;
|
||||
use jj_lib::diff::Diff;
|
||||
use jj_lib::diff::DiffHunk;
|
||||
use jj_lib::diff::DiffHunkContentVec;
|
||||
use jj_lib::diff::DiffHunkKind;
|
||||
use jj_lib::files::DiffLineHunkSide;
|
||||
use jj_lib::files::DiffLineIterator;
|
||||
|
@ -582,7 +583,7 @@ fn show_color_words_diff_hunks(
|
|||
/// Prints `num_after` lines, ellipsis, and `num_before` lines.
|
||||
fn show_color_words_context_lines(
|
||||
formatter: &mut dyn Formatter,
|
||||
contexts: &[Vec<&BStr>],
|
||||
contexts: &[DiffHunkContentVec],
|
||||
mut line_number: DiffLineNumber,
|
||||
options: &ColorWordsDiffOptions,
|
||||
num_after: usize,
|
||||
|
@ -1288,7 +1289,7 @@ fn unified_diff_hunks<'content>(
|
|||
// Just use the right (i.e. new) content. We could count the
|
||||
// number of skipped lines separately, but the number of the
|
||||
// context lines should match the displayed content.
|
||||
let [_, right] = hunk.contents.try_into().unwrap();
|
||||
let [_, right] = hunk.contents[..].try_into().unwrap();
|
||||
let mut lines = right.split_inclusive(|b| *b == b'\n').fuse();
|
||||
if !current_hunk.lines.is_empty() {
|
||||
// The previous hunk line should be either removed/added.
|
||||
|
@ -1604,7 +1605,7 @@ fn get_diff_stat(
|
|||
match hunk.kind {
|
||||
DiffHunkKind::Matching => {}
|
||||
DiffHunkKind::Different => {
|
||||
let [left, right] = hunk.contents.try_into().unwrap();
|
||||
let [left, right] = hunk.contents[..].try_into().unwrap();
|
||||
removed += left.split_inclusive(|b| *b == b'\n').count();
|
||||
added += right.split_inclusive(|b| *b == b'\n').count();
|
||||
}
|
||||
|
|
|
@ -26,6 +26,8 @@ use std::slice;
|
|||
use bstr::BStr;
|
||||
use hashbrown::HashTable;
|
||||
use itertools::Itertools;
|
||||
use smallvec::smallvec;
|
||||
use smallvec::SmallVec;
|
||||
|
||||
pub fn find_line_ranges(text: &[u8]) -> Vec<Range<usize>> {
|
||||
text.split_inclusive(|b| *b == b'\n')
|
||||
|
@ -508,8 +510,9 @@ fn intersect_unchanged_words(
|
|||
|
||||
#[derive(Clone, PartialEq, Eq, Debug)]
|
||||
struct UnchangedRange {
|
||||
// Inline up to two sides (base + one other)
|
||||
base: Range<usize>,
|
||||
others: Vec<Range<usize>>,
|
||||
others: SmallVec<[Range<usize>; 1]>,
|
||||
}
|
||||
|
||||
impl UnchangedRange {
|
||||
|
@ -538,7 +541,7 @@ impl UnchangedRange {
|
|||
#[derive(Clone, Debug)]
|
||||
pub struct Diff<'input> {
|
||||
base_input: &'input BStr,
|
||||
other_inputs: Vec<&'input BStr>,
|
||||
other_inputs: SmallVec<[&'input BStr; 1]>,
|
||||
/// Sorted list of ranges of unchanged regions in bytes.
|
||||
///
|
||||
/// The list should never be empty. The first and the last region may be
|
||||
|
@ -554,7 +557,7 @@ impl<'input> Diff<'input> {
|
|||
) -> Self {
|
||||
let mut inputs = inputs.into_iter().map(BStr::new);
|
||||
let base_input = inputs.next().expect("inputs must not be empty");
|
||||
let other_inputs = inputs.collect_vec();
|
||||
let other_inputs: SmallVec<[&BStr; 1]> = inputs.collect();
|
||||
// First tokenize each input
|
||||
let base_token_ranges: Vec<Range<usize>>;
|
||||
let other_token_ranges: Vec<Vec<Range<usize>>>;
|
||||
|
@ -583,7 +586,7 @@ impl<'input> Diff<'input> {
|
|||
|
||||
fn with_inputs_and_token_ranges(
|
||||
base_input: &'input BStr,
|
||||
other_inputs: Vec<&'input BStr>,
|
||||
other_inputs: SmallVec<[&'input BStr; 1]>,
|
||||
base_token_ranges: &[Range<usize>],
|
||||
other_token_ranges: &[Vec<Range<usize>>],
|
||||
compare: impl CompareBytes,
|
||||
|
@ -600,7 +603,7 @@ impl<'input> Diff<'input> {
|
|||
[] => {
|
||||
let whole_range = UnchangedRange {
|
||||
base: 0..base_source.text.len(),
|
||||
others: vec![],
|
||||
others: smallvec![],
|
||||
};
|
||||
vec![whole_range]
|
||||
}
|
||||
|
@ -611,7 +614,7 @@ impl<'input> Diff<'input> {
|
|||
// Add an empty range at the start to make life easier for hunks().
|
||||
unchanged_regions.push(UnchangedRange {
|
||||
base: 0..0,
|
||||
others: vec![0..0; other_inputs.len()],
|
||||
others: smallvec![0..0; other_inputs.len()],
|
||||
});
|
||||
let mut first_positions = Vec::new();
|
||||
collect_unchanged_words(
|
||||
|
@ -795,7 +798,7 @@ impl<'input> Diff<'input> {
|
|||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct DiffHunk<'input> {
|
||||
pub kind: DiffHunkKind,
|
||||
pub contents: Vec<&'input BStr>,
|
||||
pub contents: DiffHunkContentVec<'input>,
|
||||
}
|
||||
|
||||
impl<'input> DiffHunk<'input> {
|
||||
|
@ -824,6 +827,9 @@ pub enum DiffHunkKind {
|
|||
Different,
|
||||
}
|
||||
|
||||
// Inline up to two sides
|
||||
pub type DiffHunkContentVec<'input> = SmallVec<[&'input BStr; 2]>;
|
||||
|
||||
pub struct DiffHunkIterator<'diff, 'input> {
|
||||
diff: &'diff Diff<'input>,
|
||||
previous: &'diff UnchangedRange,
|
||||
|
@ -850,12 +856,12 @@ impl<'diff, 'input> Iterator for DiffHunkIterator<'diff, 'input> {
|
|||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if !self.unchanged_emitted {
|
||||
self.unchanged_emitted = true;
|
||||
let contents = self.diff.hunk_at(self.previous).collect_vec();
|
||||
let contents = self.diff.hunk_at(self.previous).collect();
|
||||
let kind = DiffHunkKind::Matching;
|
||||
return Some(DiffHunk { kind, contents });
|
||||
}
|
||||
let current = self.unchanged_iter.next()?;
|
||||
let contents = self.diff.hunk_between(self.previous, current).collect_vec();
|
||||
let contents: DiffHunkContentVec = self.diff.hunk_between(self.previous, current).collect();
|
||||
debug_assert!(
|
||||
contents.iter().any(|content| !content.is_empty()),
|
||||
"unchanged regions should have been compacted"
|
||||
|
|
Loading…
Reference in a new issue