forked from mirrors/jj
diff: omit construction of count-to-words map for right-side histogram
This also allows us to borrow Vec<WordPositions> from &self.
This commit is contained in:
parent
493f610fd5
commit
5c52b4ec13
1 changed files with 13 additions and 13 deletions
|
@ -103,13 +103,12 @@ impl<'input, 'aux> DiffSource<'input, 'aux> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Histogram<'a> {
|
struct Histogram<'input> {
|
||||||
word_to_positions: HashMap<&'a BStr, Vec<WordPosition>>,
|
word_to_positions: HashMap<&'input BStr, Vec<WordPosition>>,
|
||||||
count_to_words: BTreeMap<usize, Vec<&'a BStr>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Histogram<'_> {
|
impl<'input> Histogram<'input> {
|
||||||
fn calculate<'a>(source: &DiffSource<'a, '_>, max_occurrences: usize) -> Histogram<'a> {
|
fn calculate(source: &DiffSource<'input, '_>, max_occurrences: usize) -> Self {
|
||||||
let mut word_to_positions: HashMap<&BStr, Vec<WordPosition>> = HashMap::new();
|
let mut word_to_positions: HashMap<&BStr, Vec<WordPosition>> = HashMap::new();
|
||||||
for (i, range) in source.ranges.iter().enumerate() {
|
for (i, range) in source.ranges.iter().enumerate() {
|
||||||
let word = &source.text[range.clone()];
|
let word = &source.text[range.clone()];
|
||||||
|
@ -120,14 +119,15 @@ impl Histogram<'_> {
|
||||||
positions.push(WordPosition(i));
|
positions.push(WordPosition(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Histogram { word_to_positions }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_count_to_words(&self) -> BTreeMap<usize, Vec<&'input BStr>> {
|
||||||
let mut count_to_words: BTreeMap<usize, Vec<&BStr>> = BTreeMap::new();
|
let mut count_to_words: BTreeMap<usize, Vec<&BStr>> = BTreeMap::new();
|
||||||
for (word, ranges) in &word_to_positions {
|
for (word, ranges) in &self.word_to_positions {
|
||||||
count_to_words.entry(ranges.len()).or_default().push(word);
|
count_to_words.entry(ranges.len()).or_default().push(word);
|
||||||
}
|
}
|
||||||
Histogram {
|
count_to_words
|
||||||
word_to_positions,
|
|
||||||
count_to_words,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -233,7 +233,8 @@ fn unchanged_ranges_lcs(
|
||||||
) -> Vec<(Range<usize>, Range<usize>)> {
|
) -> Vec<(Range<usize>, Range<usize>)> {
|
||||||
let max_occurrences = 100;
|
let max_occurrences = 100;
|
||||||
let left_histogram = Histogram::calculate(left, max_occurrences);
|
let left_histogram = Histogram::calculate(left, max_occurrences);
|
||||||
if *left_histogram.count_to_words.keys().next().unwrap() > max_occurrences {
|
let left_count_to_words = left_histogram.build_count_to_words();
|
||||||
|
if *left_count_to_words.keys().next().unwrap() > max_occurrences {
|
||||||
// If there are very many occurrences of all words, then we just give up.
|
// If there are very many occurrences of all words, then we just give up.
|
||||||
return vec![];
|
return vec![];
|
||||||
}
|
}
|
||||||
|
@ -241,8 +242,7 @@ fn unchanged_ranges_lcs(
|
||||||
// Look for words with few occurrences in `left` (could equally well have picked
|
// Look for words with few occurrences in `left` (could equally well have picked
|
||||||
// `right`?). If any of them also occur in `right`, then we add the words to
|
// `right`?). If any of them also occur in `right`, then we add the words to
|
||||||
// the LCS.
|
// the LCS.
|
||||||
let Some(uncommon_shared_words) = left_histogram
|
let Some(uncommon_shared_words) = left_count_to_words
|
||||||
.count_to_words
|
|
||||||
.iter()
|
.iter()
|
||||||
.map(|(left_count, left_words)| -> Vec<&BStr> {
|
.map(|(left_count, left_words)| -> Vec<&BStr> {
|
||||||
left_words
|
left_words
|
||||||
|
|
Loading…
Reference in a new issue