forked from mirrors/jj
lib: optimize common prefix computation of two hex strings
Comparing each byte before comparing the nibbles is more efficient. A benchmark comparing the old and new implementations with various common prefix lengths shows: ``` Common hex len/old/3 time: [7.5444 ns 7.5807 ns 7.6140 ns] Common hex len/new/3 time: [1.2100 ns 1.2144 ns 1.2192 ns] Common hex len/old/6 time: [11.849 ns 11.879 ns 11.910 ns] Common hex len/new/6 time: [1.9950 ns 2.0046 ns 2.0156 ns] Common hex len/old/32 time: [63.030 ns 63.345 ns 63.718 ns] Common hex len/new/32 time: [6.4647 ns 6.4800 ns 6.4999 ns] ```
This commit is contained in:
parent
a7f32d3652
commit
56dbbb8fc6
1 changed files with 16 additions and 17 deletions
|
@ -51,24 +51,23 @@ pub fn to_reverse_hex(forward_hex: &str) -> Option<String> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
/// Calculates common prefix length of two bytes. The length to be returned is
|
||||
/// a number of hexadecimal digits.
|
||||
/// Calculates common prefix length of two byte sequences. The length
|
||||
/// to be returned is a number of hexadecimal digits.
|
||||
pub fn common_hex_len(bytes_a: &[u8], bytes_b: &[u8]) -> usize {
|
||||
iter_half_bytes(bytes_a)
|
||||
.zip(iter_half_bytes(bytes_b))
|
||||
.take_while(|(a, b)| a == b)
|
||||
.count()
|
||||
}
|
||||
|
||||
fn iter_half_bytes(bytes: &[u8]) -> impl ExactSizeIterator<Item = u8> + '_ {
|
||||
(0..bytes.len() * 2).map(|i| {
|
||||
let v = bytes[i / 2];
|
||||
if i & 1 == 0 {
|
||||
v >> 4
|
||||
} else {
|
||||
v & 0xf
|
||||
}
|
||||
})
|
||||
std::iter::zip(bytes_a, bytes_b)
|
||||
.enumerate()
|
||||
.find_map(|(i, (a, b))| {
|
||||
if a != b {
|
||||
if a >> 4 != b >> 4 {
|
||||
Some(i * 2)
|
||||
} else {
|
||||
Some(i * 2 + 1)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.unwrap_or_else(|| bytes_a.len().min(bytes_b.len()) * 2)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
Loading…
Reference in a new issue