Index: lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -249,6 +249,8 @@ case Intrinsic::r600_read_tidig_x: case Intrinsic::r600_read_tidig_y: case Intrinsic::r600_read_tidig_z: + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: case Intrinsic::amdgcn_image_atomic_swap: case Intrinsic::amdgcn_image_atomic_add: case Intrinsic::amdgcn_image_atomic_sub: @@ -274,6 +276,7 @@ case Intrinsic::amdgcn_buffer_atomic_xor: case Intrinsic::amdgcn_buffer_atomic_cmpswap: case Intrinsic::amdgcn_ps_live: + case Intrinsic::amdgcn_ds_swizzle: return true; } Index: test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll =================================================================== --- test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll +++ test/Analysis/DivergenceAnalysis/AMDGPU/atomics.ll @@ -12,4 +12,34 @@ ret {i32, i1} %orig } +; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val) +define i32 @test_atomic_inc_i32(i32 addrspace(1)* %ptr, i32 %val) #0 { + %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val) + ret i32 %ret +} + +; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val) +define i64 @test_atomic_inc_i64(i64 addrspace(1)* %ptr, i64 %val) #0 { + %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val) + ret i64 %ret +} + +; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val) +define i32 @test_atomic_dec_i32(i32 addrspace(1)* %ptr, i32 %val) #0 { + %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 %val) + ret i32 %ret +} + +; CHECK: DIVERGENT: %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val) +define i64 @test_atomic_dec_i64(i64 addrspace(1)* %ptr, i64 %val) #0 { + %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 %val) + ret i64 %ret +} + +declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32) #1 +declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64) #1 +declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32) #1 +declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64) #1 + attributes #0 = { nounwind } +attributes #1 = { nounwind argmemonly } Index: test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll =================================================================== --- /dev/null +++ test/Analysis/DivergenceAnalysis/AMDGPU/intrinsics.ll @@ -0,0 +1,13 @@ +; RUN: opt -mtriple=amdgcn-- -analyze -divergence %s | FileCheck %s + +; CHECK: DIVERGENT: %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 +define void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) #0 { + %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 + store i32 %swizzle, i32 addrspace(1)* %out, align 4 + ret void +} + +declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #1 + +attributes #0 = { nounwind convergent } +attributes #1 = { nounwind readnone convergent }