mirror of
https://github.com/martinvonz/jj.git
synced 2025-01-28 15:26:25 +00:00
diff: introduce newtype that represents word-range index
There are usize text indices/ranges and word-range indices. Let's make them somewhat distinct.
This commit is contained in:
parent
739a5d8617
commit
dd93e8f60b
1 changed files with 20 additions and 15 deletions
|
@ -73,6 +73,10 @@ pub fn find_nonword_ranges(text: &[u8]) -> Vec<Range<usize>> {
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Index in a list of word (or token) ranges.
|
||||||
|
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
|
||||||
|
struct WordPosition(usize);
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
struct DiffSource<'input, 'aux> {
|
struct DiffSource<'input, 'aux> {
|
||||||
text: &'input BStr,
|
text: &'input BStr,
|
||||||
|
@ -87,29 +91,33 @@ impl<'input, 'aux> DiffSource<'input, 'aux> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn narrowed(&self, positions: Range<usize>) -> Self {
|
fn narrowed(&self, positions: Range<WordPosition>) -> Self {
|
||||||
DiffSource {
|
DiffSource {
|
||||||
text: self.text,
|
text: self.text,
|
||||||
ranges: &self.ranges[positions],
|
ranges: &self.ranges[positions.start.0..positions.end.0],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn range_at(&self, position: WordPosition) -> Range<usize> {
|
||||||
|
self.ranges[position.0].clone()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Histogram<'a> {
|
struct Histogram<'a> {
|
||||||
word_to_positions: HashMap<&'a BStr, Vec<usize>>,
|
word_to_positions: HashMap<&'a BStr, Vec<WordPosition>>,
|
||||||
count_to_words: BTreeMap<usize, Vec<&'a BStr>>,
|
count_to_words: BTreeMap<usize, Vec<&'a BStr>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Histogram<'_> {
|
impl Histogram<'_> {
|
||||||
fn calculate<'a>(source: &DiffSource<'a, '_>, max_occurrences: usize) -> Histogram<'a> {
|
fn calculate<'a>(source: &DiffSource<'a, '_>, max_occurrences: usize) -> Histogram<'a> {
|
||||||
let mut word_to_positions: HashMap<&BStr, Vec<usize>> = HashMap::new();
|
let mut word_to_positions: HashMap<&BStr, Vec<WordPosition>> = HashMap::new();
|
||||||
for (i, range) in source.ranges.iter().enumerate() {
|
for (i, range) in source.ranges.iter().enumerate() {
|
||||||
let word = &source.text[range.clone()];
|
let word = &source.text[range.clone()];
|
||||||
let positions = word_to_positions.entry(word).or_default();
|
let positions = word_to_positions.entry(word).or_default();
|
||||||
// Allow one more than max_occurrences, so we can later skip those with more
|
// Allow one more than max_occurrences, so we can later skip those with more
|
||||||
// than max_occurrences
|
// than max_occurrences
|
||||||
if positions.len() <= max_occurrences {
|
if positions.len() <= max_occurrences {
|
||||||
positions.push(i);
|
positions.push(WordPosition(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut count_to_words: BTreeMap<usize, Vec<&BStr>> = BTreeMap::new();
|
let mut count_to_words: BTreeMap<usize, Vec<&BStr>> = BTreeMap::new();
|
||||||
|
@ -284,8 +292,8 @@ fn unchanged_ranges_lcs(
|
||||||
// Produce output ranges, recursing into the modified areas between the elements
|
// Produce output ranges, recursing into the modified areas between the elements
|
||||||
// in the LCS.
|
// in the LCS.
|
||||||
let mut result = vec![];
|
let mut result = vec![];
|
||||||
let mut previous_left_position = 0;
|
let mut previous_left_position = WordPosition(0);
|
||||||
let mut previous_right_position = 0;
|
let mut previous_right_position = WordPosition(0);
|
||||||
for (left_index, right_index) in lcs {
|
for (left_index, right_index) in lcs {
|
||||||
let left_position = left_positions[left_index].0;
|
let left_position = left_positions[left_index].0;
|
||||||
let right_position = right_positions[right_index].0;
|
let right_position = right_positions[right_index].0;
|
||||||
|
@ -299,16 +307,13 @@ fn unchanged_ranges_lcs(
|
||||||
result.push(unchanged_nested_range);
|
result.push(unchanged_nested_range);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
result.push((
|
result.push((left.range_at(left_position), right.range_at(right_position)));
|
||||||
left.ranges[left_position].clone(),
|
previous_left_position = WordPosition(left_position.0 + 1);
|
||||||
right.ranges[right_position].clone(),
|
previous_right_position = WordPosition(right_position.0 + 1);
|
||||||
));
|
|
||||||
previous_left_position = left_position + 1;
|
|
||||||
previous_right_position = right_position + 1;
|
|
||||||
}
|
}
|
||||||
// Also recurse into range at end (after common ranges).
|
// Also recurse into range at end (after common ranges).
|
||||||
let skipped_left_positions = previous_left_position..left.ranges.len();
|
let skipped_left_positions = previous_left_position..WordPosition(left.ranges.len());
|
||||||
let skipped_right_positions = previous_right_position..right.ranges.len();
|
let skipped_right_positions = previous_right_position..WordPosition(right.ranges.len());
|
||||||
if !skipped_left_positions.is_empty() || !skipped_right_positions.is_empty() {
|
if !skipped_left_positions.is_empty() || !skipped_right_positions.is_empty() {
|
||||||
for unchanged_nested_range in unchanged_ranges(
|
for unchanged_nested_range in unchanged_ranges(
|
||||||
&left.narrowed(skipped_left_positions),
|
&left.narrowed(skipped_left_positions),
|
||||||
|
|
Loading…
Reference in a new issue