diff: make tokenization return slices instead of making copies

2024-12-27 23:06:45 +00:00 · 2021-01-18 00:53:44 -08:00 · 2021-01-18 00:53:44 -08:00 · 7957feca49
commit 7957feca49
parent 2879d817dd
2 changed files with 23 additions and 28 deletions
--- a/lib/src/files.rs
+++ b/lib/src/files.rs
@ -24,50 +24,45 @@ fn is_same_word(a: u8, b: u8) -> bool {
    (is_word_byte(a) && is_word_byte(b)) || a & 0x80 != 0
 }
-fn tokenize(data: &[u8]) -> Vec<Vec<u8>> {
+fn tokenize(data: &[u8]) -> Vec<&[u8]> {
    // TODO: Fix this code to not be so inefficient, and to allow the word
    // delimiter to be configured.
    let mut output = vec![];
-    let mut current = vec![];
+    let mut word_start_pos = 0;
    let mut maybe_prev: Option<u8> = None;
-    for b in data {
+    for (i, b) in data.iter().enumerate() {
        let b = *b;
-        match maybe_prev {
+        if let Some(prev) = maybe_prev {
-            None => current.push(b),
+            if !is_same_word(prev, b) {
-            Some(prev) => {
+                output.push(&data[word_start_pos..i]);
-                if is_same_word(prev, b) {
+                word_start_pos = i;
                    current.push(b);
                } else {
                    output.push(current);
                    current = vec![b];
                }
            }
        }
        maybe_prev = Some(b);
    }
-    if !current.is_empty() {
+    if word_start_pos < data.len() {
-        output.push(current);
+        output.push(&data[word_start_pos..]);
    }
    output
 }
 #[derive(PartialEq, Eq, Clone, Debug)]
-pub enum DiffHunk {
+pub enum DiffHunk<'a> {
-    Unmodified(Vec<u8>),
+    Unmodified(&'a [u8]),
-    Added(Vec<u8>),
+    Added(&'a [u8]),
-    Removed(Vec<u8>),
+    Removed(&'a [u8]),
 }
 #[derive(PartialEq, Eq, Clone, Debug)]
-pub struct DiffLine {
+pub struct DiffLine<'a> {
    pub left_line_number: u32,
    pub right_line_number: u32,
    pub has_left_content: bool,
    pub has_right_content: bool,
-    pub hunks: Vec<DiffHunk>,
+    pub hunks: Vec<DiffHunk<'a>>,
 }
-impl DiffLine {
+impl DiffLine<'_> {
    fn reset_line(&mut self) {
        self.has_left_content = false;
        self.has_right_content = false;
@ -81,7 +76,7 @@ impl DiffLine {
    }
 }
-pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) {
+pub fn diff<'a>(left: &'a [u8], right: &'a [u8], callback: &mut impl FnMut(&DiffLine<'a>)) {
    // TODO: Should we attempt to interpret as utf-8 and otherwise break only at
    // newlines?
    let left_tokens = tokenize(left);
@ -100,7 +95,7 @@ pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) {
                assert!(left == right);
                diff_line.has_left_content = true;
                diff_line.has_right_content = true;
-                diff_line.hunks.push(DiffHunk::Unmodified(left.clone()));
+                diff_line.hunks.push(DiffHunk::Unmodified(left));
                if left == &[b'\n'] {
                    callback(&diff_line);
                    diff_line.left_line_number += 1;
@ -110,7 +105,7 @@ pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) {
            }
            diff::Result::Left(left) => {
                diff_line.has_left_content = true;
-                diff_line.hunks.push(DiffHunk::Removed(left.clone()));
+                diff_line.hunks.push(DiffHunk::Removed(left));
                if left == &[b'\n'] {
                    callback(&diff_line);
                    diff_line.left_line_number += 1;
@ -119,7 +114,7 @@ pub fn diff(left: &[u8], right: &[u8], callback: &mut impl FnMut(&DiffLine)) {
            }
            diff::Result::Right(right) => {
                diff_line.has_right_content = true;
-                diff_line.hunks.push(DiffHunk::Added(right.clone()));
+                diff_line.hunks.push(DiffHunk::Added(right));
                if right == &[b'\n'] {
                    callback(&diff_line);
                    diff_line.right_line_number += 1;
--- a/src/commands.rs
+++ b/src/commands.rs
@ -715,16 +715,16 @@ fn print_diff_line(styler: &mut dyn Styler, diff_line: &DiffLine) {
    for hunk in &diff_line.hunks {
        match hunk {
            files::DiffHunk::Unmodified(data) => {
-                styler.write_bytes(data.as_slice());
+                styler.write_bytes(data);
            }
            files::DiffHunk::Removed(data) => {
                styler.add_label(String::from("left"));
-                styler.write_bytes(data.as_slice());
+                styler.write_bytes(data);
                styler.remove_label();
            }
            files::DiffHunk::Added(data) => {
                styler.add_label(String::from("right"));
-                styler.write_bytes(data.as_slice());
+                styler.write_bytes(data);
                styler.remove_label();
            }
        }