ok/jj
1
0
Fork 0
forked from mirrors/jj

lib: optimize common prefix computation of two hex strings

Comparing each byte before comparing the nibbles is more efficient. A
benchmark comparing the old and new implementations with various common
prefix lengths shows:

```
Common hex len/old/3    time:   [7.5444 ns 7.5807 ns 7.6140 ns]
Common hex len/new/3    time:   [1.2100 ns 1.2144 ns 1.2192 ns]
Common hex len/old/6    time:   [11.849 ns 11.879 ns 11.910 ns]
Common hex len/new/6    time:   [1.9950 ns 2.0046 ns 2.0156 ns]
Common hex len/old/32   time:   [63.030 ns 63.345 ns 63.718 ns]
Common hex len/new/32   time:   [6.4647 ns 6.4800 ns 6.4999 ns]
```
This commit is contained in:
Samuel Tardieu 2024-09-15 16:36:07 +02:00
parent a7f32d3652
commit 56dbbb8fc6

View file

@ -51,24 +51,23 @@ pub fn to_reverse_hex(forward_hex: &str) -> Option<String> {
.collect() .collect()
} }
/// Calculates common prefix length of two bytes. The length to be returned is /// Calculates common prefix length of two byte sequences. The length
/// a number of hexadecimal digits. /// to be returned is a number of hexadecimal digits.
pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize { pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize {
iter_half_bytes(bytes_a) std::iter::zip(bytes_a, bytes_b)
.zip(iter_half_bytes(bytes_b)) .enumerate()
.take_while(|(a, b)| a == b) .find_map(|(i, (a, b))| {
.count() if a != b {
} if a >> 4 != b >> 4 {
Some(i * 2)
fn iter_half_bytes(bytes: &[u8]) -> impl ExactSizeIterator<Item = u8> + '_ { } else {
(0..bytes.len() * 2).map(|i| { Some(i * 2 + 1)
let v = bytes[i / 2]; }
if i & 1 == 0 { } else {
v >> 4 None
} else { }
v & 0xf })
} .unwrap_or_else(|| bytes_a.len().min(bytes_b.len()) * 2)
})
} }
#[cfg(test)] #[cfg(test)]