forked from mirrors/jj
diff: do final refinement at byte-level for non-word bytes
This results in significantly more readable diffs on commits like
659393bec2
in this repo.
Before:
test bench_diff_10k_lines_reversed ... bench: 38,122,998 ns/iter (+/- 557,688)
test bench_diff_10k_modified_lines ... bench: 32,556,563 ns/iter (+/- 548,114)
test bench_diff_10k_unchanged_lines ... bench: 4,231 ns/iter (+/- 15)
test bench_diff_1k_lines_reversed ... bench: 958,296 ns/iter (+/- 46,963)
test bench_diff_1k_modified_lines ... bench: 3,014,723 ns/iter (+/- 15,830)
test bench_diff_1k_unchanged_lines ... bench: 249 ns/iter (+/- 2)
test bench_diff_git_git_read_tree_c ... bench: 78,599 ns/iter (+/- 1,079)
After:
test bench_diff_10k_lines_reversed ... bench: 38,289,493 ns/iter (+/- 413,712)
test bench_diff_10k_modified_lines ... bench: 37,352,516 ns/iter (+/- 1,293,950)
test bench_diff_10k_unchanged_lines ... bench: 4,238 ns/iter (+/- 13)
test bench_diff_1k_lines_reversed ... bench: 967,253 ns/iter (+/- 8,506)
test bench_diff_1k_modified_lines ... bench: 3,358,028 ns/iter (+/- 37,154)
test bench_diff_1k_unchanged_lines ... bench: 233 ns/iter (+/- 1)
test bench_diff_git_git_read_tree_c ... bench: 95,787 ns/iter (+/- 740)
So the biggest slowdown is when there are modified lines.
This commit is contained in:
parent
f634ff0e3f
commit
0dd000d236
1 changed files with 10 additions and 7 deletions
|
@ -37,18 +37,21 @@ pub fn find_line_ranges(text: &[u8]) -> Vec<Range<usize>> {
|
|||
ranges
|
||||
}
|
||||
|
||||
fn is_word_byte(b: u8) -> bool {
|
||||
// TODO: Make this configurable (probably higher up in the call stack)
|
||||
matches!(b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'_')
|
||||
}
|
||||
|
||||
pub fn find_word_ranges(text: &[u8]) -> Vec<Range<usize>> {
|
||||
let mut word_ranges = vec![];
|
||||
let mut word_start_pos = 0;
|
||||
let mut in_word = false;
|
||||
for (i, b) in text.iter().enumerate() {
|
||||
// TODO: Make this configurable (probably higher up in the call stack)
|
||||
let is_word_byte = matches!(*b, b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'_');
|
||||
if in_word && !is_word_byte {
|
||||
if in_word && !is_word_byte(*b) {
|
||||
in_word = false;
|
||||
word_ranges.push(word_start_pos..i);
|
||||
word_start_pos = i;
|
||||
} else if !in_word && is_word_byte {
|
||||
} else if !in_word && is_word_byte(*b) {
|
||||
in_word = true;
|
||||
word_start_pos = i;
|
||||
}
|
||||
|
@ -59,10 +62,10 @@ pub fn find_word_ranges(text: &[u8]) -> Vec<Range<usize>> {
|
|||
word_ranges
|
||||
}
|
||||
|
||||
pub fn find_newline_ranges(text: &[u8]) -> Vec<Range<usize>> {
|
||||
pub fn find_nonword_ranges(text: &[u8]) -> Vec<Range<usize>> {
|
||||
let mut ranges = vec![];
|
||||
for (i, b) in text.iter().enumerate() {
|
||||
if *b == b'\n' {
|
||||
if !is_word_byte(*b) {
|
||||
ranges.push(i..i + 1);
|
||||
}
|
||||
}
|
||||
|
@ -472,7 +475,7 @@ pub fn diff<'a>(left: &'a [u8], right: &'a [u8]) -> Vec<SliceDiff<'a>> {
|
|||
let range_diffs = vec![RangeDiff::Replaced(0..left.len(), 0..right.len())];
|
||||
let range_diffs = refine_changed_ranges(left, right, &range_diffs, &find_line_ranges);
|
||||
let range_diffs = refine_changed_ranges(left, right, &range_diffs, &find_word_ranges);
|
||||
let range_diffs = refine_changed_ranges(left, right, &range_diffs, &find_newline_ranges);
|
||||
let range_diffs = refine_changed_ranges(left, right, &range_diffs, &find_nonword_ranges);
|
||||
range_diffs_to_slice_diffs(left, right, &range_diffs)
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue