Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -2974,6 +2974,7 @@ // special case to check if the atomic has only one extract_subreg use, // which itself has no uses. if ((Node->hasNUsesOfValue(1, 0) && + Node->use_begin()->isMachineOpcode() && Node->use_begin()->getMachineOpcode() == AMDGPU::EXTRACT_SUBREG && !Node->use_begin()->hasAnyUseOfValue(0))) { unsigned Def = MI->getOperand(0).getReg(); Index: test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll @@ -100,6 +100,15 @@ ret float %out } +;CHECK-LABEL: {{^}}test4: +;CHECK: buffer_atomic_add v0, +define amdgpu_ps float @test4() { +main_body: + %v = call i32 @llvm.amdgcn.buffer.atomic.add(i32 1, <4 x i32> undef, i32 0, i32 4, i1 false) + %v.float = bitcast i32 %v to float + ret float %v.float +} + declare i32 @llvm.amdgcn.buffer.atomic.swap(i32, <4 x i32>, i32, i32, i1) #0 declare i32 @llvm.amdgcn.buffer.atomic.add(i32, <4 x i32>, i32, i32, i1) #0 declare i32 @llvm.amdgcn.buffer.atomic.sub(i32, <4 x i32>, i32, i32, i1) #0