diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -11799,6 +11799,29 @@ case ISD::INTRINSIC_W_CHAIN: return AMDGPU::isIntrinsicSourceOfDivergence( cast(N->getOperand(1))->getZExtValue()); + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD_FADD: + case ISD::ATOMIC_LOAD_FSUB: + // TODO: May need to check operand values to determine divergence. Besides + // some atomic ops like CLR are alway divergent. The divergence of some + // atomic ops is value dependant. For instance, atomic p, v is always + // uniform if p is uniform and v is 0 for ADD/SUB/XOR/FADD/FSUB. But, for + // other non-zero v, those atomic ops are always divergent for non-zero v + // no matter the divergence of p or v. The remaining atomic ops only depend + // on divergences of their operands. For example, atomic p, v is always + // uniform if p and v are uniform. + return true; } return false; } diff --git a/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll b/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx906 -o - %s | FileCheck %s + +%S = type <{ float, double }> + +; The result of that atomic ops should not be used as a uniform value. +define protected amdgpu_kernel void @foo(i32 addrspace(1)* %p, %S addrspace(1)* %q) { +; CHECK-LABEL: foo: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: v_mov_b32_e32 v3, 1 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: global_atomic_add v2, v2, v3, s[0:1] glc +; CHECK-NEXT: v_mov_b32_e32 v0, s2 +; CHECK-NEXT: v_mov_b32_e32 v1, s3 +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v2, 12, v[0:1] +; CHECK-NEXT: v_mov_b32_e32 v2, 1.0 +; CHECK-NEXT: global_store_dword v[0:1], v2, off +; CHECK-NEXT: s_endpgm + %n32 = atomicrmw add i32 addrspace(1)* %p, i32 1 monotonic + %n64 = zext i32 %n32 to i64 + %p1 = getelementptr inbounds %S, %S addrspace(1)* %q, i64 %n64, i32 0 + store float 1.0, float addrspace(1)* %p1 + ret void +}