diff: make tokenization return slices instead of making copies

This commit is contained in:
Martin von Zweigbergk 2021-01-18 00:53:44 -08:00
parent 2879d817dd
commit 7957feca49
2 changed files with 23 additions and 28 deletions

View file

@ -24,50 +24,45 @@ fn is_same_word(a: u8, b: u8) -> bool {
(is_word_byte(a) && is_word_byte(b)) || a & 0x80 != 0 (is_word_byte(a) && is_word_byte(b)) || a & 0x80 != 0
} }
fn tokenize(data: &[u8]) -> Vec<Vec<u8>> { fn tokenize(data: &[u8]) -> Vec<&[u8]> {
// TODO: Fix this code to not be so inefficient, and to allow the word // TODO: Fix this code to not be so inefficient, and to allow the word
// delimiter to be configured. // delimiter to be configured.
let mut output = vec![]; let mut output = vec![];
let mut current = vec![]; let mut word_start_pos = 0;
let mut maybe_prev: Option<u8> = None; let mut maybe_prev: Option<u8> = None;
for b in data { for (i, b) in data.iter().enumerate() {
let b = *b; let b = *b;
match maybe_prev { if let Some(prev) = maybe_prev {
None => current.push(b), if !is_same_word(prev, b) {
Some(prev) => { output.push(&data[word_start_pos..i]);
if is_same_word(prev, b) { word_start_pos = i;
current.push(b);
} else {
output.push(current);
current = vec![b];
}
} }
} }
maybe_prev = Some(b); maybe_prev = Some(b);
} }
if !current.is_empty() { if word_start_pos < data.len() {
output.push(current); output.push(&data[word_start_pos..]);
} }
output output
} }
#[derive(PartialEq, Eq, Clone, Debug)] #[derive(PartialEq, Eq, Clone, Debug)]
pub enum DiffHunk { pub enum DiffHunk<'a> {
Unmodified(Vec<u8>), Unmodified(&'a [u8]),
Added(Vec<u8>), Added(&'a [u8]),
Removed(Vec<u8>), Removed(&'a [u8]),
} }
#[derive(PartialEq, Eq, Clone, Debug)] #[derive(PartialEq, Eq, Clone, Debug)]
pub struct DiffLine { pub struct DiffLine<'a> {
pub left_line_number: u32, pub left_line_number: u32,
pub right_line_number: u32, pub right_line_number: u32,
pub has_left_content: bool, pub has_left_content: bool,
pub has_right_content: bool, pub has_right_content: bool,
pub hunks: Vec<DiffHunk>, pub hunks: Vec<DiffHunk<'a>>,
} }
impl DiffLine { impl DiffLine<'_> {
fn reset_line(&mut self) { fn reset_line(&mut self) {
self.has_left_content = false; self.has_left_content = false;
self.has_right_content = false; self.has_right_content = false;
@ -81,7 +76,7 @@ impl DiffLine {
} }
} }
pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) { pub fn diff<'a>(left: &'a [u8], right: &'a [u8], callback: &mut impl FnMut(&DiffLine<'a>)) {
// TODO: Should we attempt to interpret as utf-8 and otherwise break only at // TODO: Should we attempt to interpret as utf-8 and otherwise break only at
// newlines? // newlines?
let left_tokens = tokenize(left); let left_tokens = tokenize(left);
@ -100,7 +95,7 @@ pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) {
assert!(left == right); assert!(left == right);
diff_line.has_left_content = true; diff_line.has_left_content = true;
diff_line.has_right_content = true; diff_line.has_right_content = true;
diff_line.hunks.push(DiffHunk::Unmodified(left.clone())); diff_line.hunks.push(DiffHunk::Unmodified(left));
if left == &[b'\n'] { if left == &[b'\n'] {
callback(&diff_line); callback(&diff_line);
diff_line.left_line_number += 1; diff_line.left_line_number += 1;
@ -110,7 +105,7 @@ pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) {
} }
diff::Result::Left(left) => { diff::Result::Left(left) => {
diff_line.has_left_content = true; diff_line.has_left_content = true;
diff_line.hunks.push(DiffHunk::Removed(left.clone())); diff_line.hunks.push(DiffHunk::Removed(left));
if left == &[b'\n'] { if left == &[b'\n'] {
callback(&diff_line); callback(&diff_line);
diff_line.left_line_number += 1; diff_line.left_line_number += 1;
@ -119,7 +114,7 @@ pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) {
} }
diff::Result::Right(right) => { diff::Result::Right(right) => {
diff_line.has_right_content = true; diff_line.has_right_content = true;
diff_line.hunks.push(DiffHunk::Added(right.clone())); diff_line.hunks.push(DiffHunk::Added(right));
if right == &[b'\n'] { if right == &[b'\n'] {
callback(&diff_line); callback(&diff_line);
diff_line.right_line_number += 1; diff_line.right_line_number += 1;

View file

@ -715,16 +715,16 @@ fn print_diff_line(styler: &mut dyn Styler, diff_line: &DiffLine) {
for hunk in &diff_line.hunks { for hunk in &diff_line.hunks {
match hunk { match hunk {
files::DiffHunk::Unmodified(data) => { files::DiffHunk::Unmodified(data) => {
styler.write_bytes(data.as_slice()); styler.write_bytes(data);
} }
files::DiffHunk::Removed(data) => { files::DiffHunk::Removed(data) => {
styler.add_label(String::from("left")); styler.add_label(String::from("left"));
styler.write_bytes(data.as_slice()); styler.write_bytes(data);
styler.remove_label(); styler.remove_label();
} }
files::DiffHunk::Added(data) => { files::DiffHunk::Added(data) => {
styler.add_label(String::from("right")); styler.add_label(String::from("right"));
styler.write_bytes(data.as_slice()); styler.write_bytes(data);
styler.remove_label(); styler.remove_label();
} }
} }