diff --git a/src/lib.rs b/src/lib.rs index 03117c9..0e83e2d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -197,11 +197,26 @@ const PREVENT_TRIVIAL_ZERO_COLLAPSE: u64 = 0xa4093822299f31d0; #[inline] fn multiply_mix(x: u64, y: u64) -> u64 { - #[cfg(target_pointer_width = "64")] - { + // The following code path is only fast if 64-bit to 128-bit widening + // multiplication is supported by the architecture. Most 64-bit + // architectures except SPARC64 and Wasm64 support it. However, the target + // pointer width doesn't always indicate that we are dealing with a 64-bit + // architecture, as there are ABIs that reduce the pointer width, especially + // on AArch64 and x86-64. WebAssembly (regardless of pointer width) supports + // 64-bit to 128-bit widening multiplication with the `wide-arithmetic` + // proposal. + if cfg!(any( + all( + target_pointer_width = "64", + not(any(target_arch = "sparc64", target_arch = "wasm64")), + ), + target_arch = "aarch64", + target_arch = "x86_64", + all(target_family = "wasm", target_feature = "wide-arithmetic"), + )) { // We compute the full u64 x u64 -> u128 product, this is a single mul // instruction on x86-64, one mul plus one mulhi on ARM64. - let full = (x as u128) * (y as u128); + let full = (x as u128).wrapping_mul(y as u128); let lo = full as u64; let hi = (full >> 64) as u64; @@ -216,10 +231,7 @@ fn multiply_mix(x: u64, y: u64) -> u64 { // x * y = 2^64 * hi + lo = (-1) * hi + lo = lo - hi, (mod 2^64 + 1) // x * y = 2^64 * hi + lo = 1 * hi + lo = lo + hi, (mod 2^64 - 1) // Multiplicative hashing is universal in a field (like mod p). - } - - #[cfg(target_pointer_width = "32")] - { + } else { // u64 x u64 -> u128 product is prohibitively expensive on 32-bit. // Decompose into 32-bit parts. let lx = x as u32; @@ -228,8 +240,8 @@ fn multiply_mix(x: u64, y: u64) -> u64 { let hy = (y >> 32) as u32; // u32 x u32 -> u64 the low bits of one with the high bits of the other. - let afull = (lx as u64) * (hy as u64); - let bfull = (hx as u64) * (ly as u64); + let afull = (lx as u64).wrapping_mul(hy as u64); + let bfull = (hx as u64).wrapping_mul(ly as u64); // Combine, swapping low/high of one of them so the upper bits of the // product of one combine with the lower bits of the other.