Index: lib/Target/AMDGPU/AMDGPUConvertAtomicLibCalls.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUConvertAtomicLibCalls.cpp +++ lib/Target/AMDGPU/AMDGPUConvertAtomicLibCalls.cpp @@ -542,7 +542,7 @@ dyn_cast(NI)->setSynchScope( (SynchronizationScope)OCL1XAtomicScope.getValue()); if (NeedCast) { - NI = Builder.CreateBitCast(NI, F->getType()); + NI = Builder.CreateBitCast(NI, F->getReturnType()); } } else if (Type == CMPXCHG) { NI = Builder.CreateAtomicCmpXchg( Index: test/CodeGen/AMDGPU/opencl-1.2-atomics.ll =================================================================== --- test/CodeGen/AMDGPU/opencl-1.2-atomics.ll +++ test/CodeGen/AMDGPU/opencl-1.2-atomics.ll @@ -3,15 +3,20 @@ ; CHECK: atomicrmw add i32 addrspace(1)* null, i32 0 synchscope(2) monotonic ; CHECK: cmpxchg i32 addrspace(1)* null, i32 0, i32 0 synchscope(2) monotonic monotonic ; CHECK: atomicrmw xchg i32 addrspace(1)* null, i32 0 synchscope(2) monotonic +; CHECK: [[A:%[0-9]*]] = atomicrmw xchg i32 addrspace(3)* null, i32 0 synchscope(2) monotonic +; CHECK: bitcast i32 [[A]] to float + define amdgpu_kernel void @test() { entry: %call0 = call i32 @_Z8atom_incPU3AS1Vi(i32 addrspace(1)* null) %call1 = call i32 @_Z10atomic_addPU3AS1Vii(i32 addrspace(1)* null, i32 0) %call2 = call i32 @_Z14atomic_cmpxchgPU3AS1Viii(i32 addrspace(1)* null, i32 0, i32 0) %call3 = call i32 @_Z11atomic_xchgPU3AS1Vii(i32 addrspace(1)* null, i32 0) + %call4 = call float @_Z11atomic_xchgPU3AS3Vff(float addrspace(3)* null, float 0.0) ret void } declare i32 @_Z8atom_incPU3AS1Vi(i32 addrspace(1)*) declare i32 @_Z10atomic_addPU3AS1Vii(i32 addrspace(1)*, i32) declare i32 @_Z14atomic_cmpxchgPU3AS1Viii(i32 addrspace(1)*, i32, i32) declare i32 @_Z11atomic_xchgPU3AS1Vii(i32 addrspace(1)*, i32) +declare float @_Z11atomic_xchgPU3AS3Vff(float addrspace(3)*, float)