ok/jj
1
0
Fork 0
forked from mirrors/jj

lib: optimize common prefix computation of two hex strings

Comparing each byte before comparing the nibbles is more efficient. A
benchmark comparing the old and new implementations with various common
prefix lengths shows:

```
Common hex len/old/3    time:   [7.5444 ns 7.5807 ns 7.6140 ns]
Common hex len/new/3    time:   [1.2100 ns 1.2144 ns 1.2192 ns]
Common hex len/old/6    time:   [11.849 ns 11.879 ns 11.910 ns]
Common hex len/new/6    time:   [1.9950 ns 2.0046 ns 2.0156 ns]
Common hex len/old/32   time:   [63.030 ns 63.345 ns 63.718 ns]
Common hex len/new/32   time:   [6.4647 ns 6.4800 ns 6.4999 ns]
```
This commit is contained in:
Samuel Tardieu 2024-09-15 16:36:07 +02:00
parent a7f32d3652
commit 56dbbb8fc6

View file

@ -51,24 +51,23 @@ pub fn to_reverse_hex(forward_hex: &str) -> Option<String> {
.collect()
}
/// Calculates common prefix length of two bytes. The length to be returned is
/// a number of hexadecimal digits.
/// Calculates common prefix length of two byte sequences. The length
/// to be returned is a number of hexadecimal digits.
pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize {
iter_half_bytes(bytes_a)
.zip(iter_half_bytes(bytes_b))
.take_while(|(a, b)| a == b)
.count()
}
fn iter_half_bytes(bytes: &[u8]) -> impl ExactSizeIterator<Item = u8> + '_ {
(0..bytes.len() * 2).map(|i| {
let v = bytes[i / 2];
if i & 1 == 0 {
v >> 4
} else {
v & 0xf
}
})
std::iter::zip(bytes_a, bytes_b)
.enumerate()
.find_map(|(i, (a, b))| {
if a != b {
if a >> 4 != b >> 4 {
Some(i * 2)
} else {
Some(i * 2 + 1)
}
} else {
None
}
})
.unwrap_or_else(|| bytes_a.len().min(bytes_b.len()) * 2)
}
#[cfg(test)]