Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -11751,8 +11751,12 @@ // TODO: Do have these for flat. Older targets also had them for buffers. unsigned AS = RMW->getPointerAddressSpace(); + bool UnsafeFpAtomics = + RMW->getFunction()->getFnAttribute("amdgpu-unsafe-fp-atomics") + .getValueAsString() == "true"; + if (AS == AMDGPUAS::GLOBAL_ADDRESS && Subtarget->hasAtomicFaddInsts()) { - if (!fpModeMatchesGlobalFPAtomicMode(RMW)) + if (!UnsafeFpAtomics && !fpModeMatchesGlobalFPAtomicMode(RMW)) return AtomicExpansionKind::CmpXChg; return RMW->use_empty() ? AtomicExpansionKind::None : Index: llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll +++ llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll @@ -61,5 +61,24 @@ ret void } +; GCN-LABEL: {{^}}global_atomic_fadd_noret_f32_unsafe_fp_atomics: +; GFX900: v_add_f32_e32 +; GFX900: global_atomic_cmpswap +; GFX908: global_atomic_add_f32 +define amdgpu_kernel void @global_atomic_fadd_noret_f32_unsafe_fp_atomics(float addrspace(1)* %ptr) #2 { + %result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst + ret void +} + +; GCN-LABEL: {{^}}global_atomic_fadd_noret_f32_safe_fp_atomics: +; GCN: v_add_f32_e32 +; GCN: global_atomic_cmpswap +define amdgpu_kernel void @global_atomic_fadd_noret_f32_safe_fp_atomics(float addrspace(1)* %ptr) #3 { + %result = atomicrmw fadd float addrspace(1)* %ptr, float 4.0 seq_cst + ret void +} + attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign"} attributes #1 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "target-cpu"="gfx803" "target-features"="+atomic-fadd-insts" } +attributes #2 = { "amdgpu-unsafe-fp-atomics"="true" } +attributes #3 = { "amdgpu-unsafe-fp-atomics"="false" }