Open
Description
Consider the following Rust code for determining if any of the 8 bytes in a u64 are zero, taken from the Rust standard library's implementation of memchr
:
const LO: u64 = 0x01_01_01_01_01_01_01_01;
const HI: u64 = 0x80_80_80_80_80_80_80_80;
const fn contains_zero_byte(x: u64) -> bool {
x.wrapping_sub(LO) & !x & HI != 0
}
The equivalent C++ code is:
https://godbolt.org/z/Ej8TT8v84
constexpr u64 LO = 0x01'01'01'01'01'01'01'01;
constexpr u64 HI = 0x80'80'80'80'80'80'80'80;
bool contains_zero_byte(u64 x) {
return ((x - LO) & ~x & HI) != 0;
}
For this function, GCC generates
;; AArch64:
contains_zero(unsigned long):
mov x1, -72340172838076674
movk x1, 0xfeff, lsl 0
add x1, x0, x1
bic x1, x1, x0
tst x1, -9187201950435737472
cset w0, ne
ret
;; x86_64:
contains_zero_byte(unsigned long):
movabs rax, -72340172838076673
add rax, rdi
andn rdi, rdi, rax
movabs rax, -9187201950435737472
test rdi, rax
setne al
ret
but LLVM generates:
;; AArch64:
contains_zero_byte(unsigned long):
mov x8, #72340172838076673
mov x9, #-9187201950435737472
movk x8, #256
sub x8, x8, x0
orr x8, x8, x0
bics xzr, x9, x8
cset w0, ne
ret
;; x86_64:
contains_zero_byte(unsigned long):
movabs rax, 72340172838076672
sub rax, rdi
or rax, rdi
movabs rcx, -9187201950435737472
andn rax, rax, rcx
setne al
ret
If we rewrite the LLVM IR for to match GCC's output, we get the correct assembly (llc
output, alive proof)
define dso_local noundef i1 @src(i64 noundef %0) local_unnamed_addr #0 {
%2 = sub i64 u0x0101010101010100, %0
%3 = or i64 %2, %0
%4 = and i64 %3, u0x8080808080808080
%5 = icmp ne i64 %4, u0x8080808080808080
ret i1 %5
}
define dso_local noundef i1 @tgt(i64 noundef %0) local_unnamed_addr #0 {
%not_0 = xor i64 %0, -1
%2 = add i64 %0, u0xFEFEFEFEFEFEFEFF
%3 = and i64 %2, %not_0
%4 = and i64 %3, u0x8080808080808080
%5 = icmp ne i64 %4, 0
ret i1 %5
}
Rewriting the C++ function does not produce the correct assembly, so I assume the problem is InstCombine is canonicalising to a less optimal form