Apply a score boost when consecutive triplets of characters match

This commit is contained in:
Antonio Scandurra 2023-08-23 19:21:44 +02:00
parent 301a12923f
commit f22acb602e

View file

@ -1,3 +1,4 @@
use collections::HashMap;
use std::{
fmt::{self, Debug},
ops::Range,
@ -74,12 +75,13 @@ pub struct Diff {
scores: Matrix,
old_text_ix: usize,
new_text_ix: usize,
equal_runs: HashMap<(usize, usize), u32>,
}
impl Diff {
const INSERTION_SCORE: isize = -1;
const DELETION_SCORE: isize = -4;
const EQUALITY_SCORE: isize = 15;
const DELETION_SCORE: isize = -5;
const EQUALITY_BASE: isize = 2;
pub fn new(old: String) -> Self {
let old = old.chars().collect::<Vec<_>>();
@ -94,6 +96,7 @@ impl Diff {
scores,
old_text_ix: 0,
new_text_ix: 0,
equal_runs: Default::default(),
}
}
@ -107,36 +110,38 @@ impl Diff {
let insertion_score = self.scores.get(i, j - 1) + Self::INSERTION_SCORE;
let deletion_score = self.scores.get(i - 1, j) + Self::DELETION_SCORE;
let equality_score = if self.old[i - 1] == self.new[j - 1] {
let mut equal_run = self.equal_runs.get(&(i - 1, j - 1)).copied().unwrap_or(0);
equal_run += 1;
self.equal_runs.insert((i, j), equal_run);
if self.old[i - 1] == ' ' {
self.scores.get(i - 1, j - 1)
} else {
self.scores.get(i - 1, j - 1) + Self::EQUALITY_SCORE
self.scores.get(i - 1, j - 1) + Self::EQUALITY_BASE.pow(equal_run / 3)
}
} else {
isize::MIN
};
let score = insertion_score.max(deletion_score).max(equality_score);
self.scores.set(i, j, score);
}
}
let mut max_score = isize::MIN;
let mut best_row = self.old_text_ix;
let mut best_col = self.new_text_ix;
let mut next_old_text_ix = self.old_text_ix;
let next_new_text_ix = self.new.len();
for i in self.old_text_ix..=self.old.len() {
for j in self.new_text_ix..=self.new.len() {
let score = self.scores.get(i, j);
if score > max_score {
max_score = score;
best_row = i;
best_col = j;
}
let score = self.scores.get(i, next_new_text_ix);
if score > max_score {
max_score = score;
next_old_text_ix = i;
}
}
let hunks = self.backtrack(best_row, best_col);
self.old_text_ix = best_row;
self.new_text_ix = best_col;
let hunks = self.backtrack(next_old_text_ix, next_new_text_ix);
self.old_text_ix = next_old_text_ix;
self.new_text_ix = next_new_text_ix;
hunks
}