diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp --- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp +++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -344,11 +344,6 @@ auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst); assert(DstOpnd && DstOpnd->isReg()); auto DPPMovReg = DstOpnd->getReg(); - if (execMayBeModifiedBeforeUse(*MRI, DPPMovReg, MovMI)) { - LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" - " for all uses\n"); - return false; - } auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask); assert(RowMaskOpnd && RowMaskOpnd->isImm()); @@ -426,6 +421,12 @@ auto &OrigMI = *Use.getParent(); LLVM_DEBUG(dbgs() << " try: " << OrigMI); + if (execMayBeModifiedBeforeUse(*MRI, DPPMovReg, MovMI, &OrigMI)) { + LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" + " for all uses\n"); + break; + } + auto OrigOp = OrigMI.getOpcode(); if (TII->isVOP3(OrigOp)) { if (!TII->hasVALU32BitEncoding(OrigOp)) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -6091,29 +6091,16 @@ auto *TRI = MRI.getTargetRegisterInfo(); auto *DefBB = DefMI.getParent(); - if (UseMI) { - // Don't bother searching between blocks, although it is possible this block - // doesn't modify exec. - if (UseMI->getParent() != DefBB) - return true; - } else { - int NumUse = 0; - const int MaxUseScan = 10; - - for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) { - if (UseInst.getParent() != DefBB) - return true; - - if (NumUse++ > MaxUseScan) - return true; - } - } + // Don't bother searching between blocks, although it is possible this block + // doesn't modify exec. + if (UseMI->getParent() != DefBB) + return true; const int MaxInstScan = 20; int NumScan = 0; // Stop scan at the use if known. - auto E = UseMI ? UseMI->getIterator() : DefBB->end(); + auto E = UseMI->getIterator(); for (auto I = std::next(DefMI.getIterator()); I != E; ++I) { if (I->isDebugInstr()) continue; diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -190,3 +190,17 @@ store i64 %old, i64 addrspace(1)* %out ret void } + +; Check that the DPP combiner works on the code generated by the atomic optimizer. + +; GFX8MORE-LABEL: add_i32_varying_dpp: +; GFX8MORE: v_add_u32_dpp +; GFX8MORE: v_add_u32_dpp +; GFX8MORE: v_add_u32_dpp +; GFX8MORE: v_add_u32_dpp +; GFX8MORE: v_add_u32_dpp +define amdgpu_kernel void @add_i32_varying_dpp() { + %lane = call i32 @llvm.amdgcn.workitem.id.x() + %old = atomicrmw add i32 addrspace(3)* @local_var32, i32 %lane acq_rel + ret void +}