From 7957feca49f30b61892c60035c76646d41966751 Mon Sep 17 00:00:00 2001 From: Martin von Zweigbergk Date: Mon, 18 Jan 2021 00:53:44 -0800 Subject: [PATCH] diff: make tokenization return slices instead of making copies --- lib/src/files.rs | 45 ++++++++++++++++++++------------------------- src/commands.rs | 6 +++--- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/lib/src/files.rs b/lib/src/files.rs index f821851b5..60bea1426 100644 --- a/lib/src/files.rs +++ b/lib/src/files.rs @@ -24,50 +24,45 @@ fn is_same_word(a: u8, b: u8) -> bool { (is_word_byte(a) && is_word_byte(b)) || a & 0x80 != 0 } -fn tokenize(data: &[u8]) -> Vec> { +fn tokenize(data: &[u8]) -> Vec<&[u8]> { // TODO: Fix this code to not be so inefficient, and to allow the word // delimiter to be configured. let mut output = vec![]; - let mut current = vec![]; + let mut word_start_pos = 0; let mut maybe_prev: Option = None; - for b in data { + for (i, b) in data.iter().enumerate() { let b = *b; - match maybe_prev { - None => current.push(b), - Some(prev) => { - if is_same_word(prev, b) { - current.push(b); - } else { - output.push(current); - current = vec![b]; - } + if let Some(prev) = maybe_prev { + if !is_same_word(prev, b) { + output.push(&data[word_start_pos..i]); + word_start_pos = i; } } maybe_prev = Some(b); } - if !current.is_empty() { - output.push(current); + if word_start_pos < data.len() { + output.push(&data[word_start_pos..]); } output } #[derive(PartialEq, Eq, Clone, Debug)] -pub enum DiffHunk { - Unmodified(Vec), - Added(Vec), - Removed(Vec), +pub enum DiffHunk<'a> { + Unmodified(&'a [u8]), + Added(&'a [u8]), + Removed(&'a [u8]), } #[derive(PartialEq, Eq, Clone, Debug)] -pub struct DiffLine { +pub struct DiffLine<'a> { pub left_line_number: u32, pub right_line_number: u32, pub has_left_content: bool, pub has_right_content: bool, - pub hunks: Vec, + pub hunks: Vec>, } -impl DiffLine { +impl DiffLine<'_> { fn reset_line(&mut self) { self.has_left_content = false; self.has_right_content = false; @@ -81,7 +76,7 @@ impl DiffLine { } } -pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) { +pub fn diff<'a>(left: &'a [u8], right: &'a [u8], callback: &mut impl FnMut(&DiffLine<'a>)) { // TODO: Should we attempt to interpret as utf-8 and otherwise break only at // newlines? let left_tokens = tokenize(left); @@ -100,7 +95,7 @@ pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) { assert!(left == right); diff_line.has_left_content = true; diff_line.has_right_content = true; - diff_line.hunks.push(DiffHunk::Unmodified(left.clone())); + diff_line.hunks.push(DiffHunk::Unmodified(left)); if left == &[b'\n'] { callback(&diff_line); diff_line.left_line_number += 1; @@ -110,7 +105,7 @@ pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) { } diff::Result::Left(left) => { diff_line.has_left_content = true; - diff_line.hunks.push(DiffHunk::Removed(left.clone())); + diff_line.hunks.push(DiffHunk::Removed(left)); if left == &[b'\n'] { callback(&diff_line); diff_line.left_line_number += 1; @@ -119,7 +114,7 @@ pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) { } diff::Result::Right(right) => { diff_line.has_right_content = true; - diff_line.hunks.push(DiffHunk::Added(right.clone())); + diff_line.hunks.push(DiffHunk::Added(right)); if right == &[b'\n'] { callback(&diff_line); diff_line.right_line_number += 1; diff --git a/src/commands.rs b/src/commands.rs index 2fbf00a84..1af68aa1b 100644 --- a/src/commands.rs +++ b/src/commands.rs @@ -715,16 +715,16 @@ fn print_diff_line(styler: &mut dyn Styler, diff_line: &DiffLine) { for hunk in &diff_line.hunks { match hunk { files::DiffHunk::Unmodified(data) => { - styler.write_bytes(data.as_slice()); + styler.write_bytes(data); } files::DiffHunk::Removed(data) => { styler.add_label(String::from("left")); - styler.write_bytes(data.as_slice()); + styler.write_bytes(data); styler.remove_label(); } files::DiffHunk::Added(data) => { styler.add_label(String::from("right")); - styler.write_bytes(data.as_slice()); + styler.write_bytes(data); styler.remove_label(); } }