Skip to content

x86-64 LOCK op, ptr, CONST generation is inconsistent and constant-dependent #147280

Open
@orlp

Description

@orlp

Consider these two nearly identical Rust functions:

use std::sync::atomic::*;

#[inline(never)]
pub fn foo(x: &AtomicU64) -> bool {
    let old = x.fetch_and(!2, Ordering::Relaxed);
    let new = old & !2;
    new != 0
}

#[inline(never)]
pub fn bar(x: &AtomicU64) -> bool {
    let old = x.fetch_and(!1, Ordering::Relaxed);
    let new = old & !1;
    new != 0
}

One is optimized well to a LOCK instruction + flag test, the other to a CAS loop:

example::foo::hcccd03d7e323547c:
        lock            and     qword ptr [rdi], -3
        setne   al
        ret

example::bar::hd8685fcc36071aa3:
        mov     rax, qword ptr [rdi]
.LBB1_1:
        mov     rcx, rax
        and     rcx, -2
        lock            cmpxchg qword ptr [rdi], rcx
        jne     .LBB1_1
        cmp     rax, 2
        setae   al
        ret

The generated LLVM IR is

define noundef zeroext i1 @example::foo::hcccd03d7e323547c(ptr noundef nonnull align 8 %x) unnamed_addr {
start:
  %0 = atomicrmw and ptr %x, i64 -3 monotonic, align 8
  %new = and i64 %0, -3
  %_0 = icmp ne i64 %new, 0
  ret i1 %_0
}

(and similarly for bar just with the constant adjusted)


In general I would expect an atomic arithmetic RMW operation followed by a test which could be answered using the zero flag, sign flag or overflow flag to be done using the LOCK prefix followed by inspecting the appropriate flag on x86-64. It seems really inconsistent in general whether this happens or not.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions