diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -7093,7 +7093,6 @@ return false; } - bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI, Register VReg, const MachineInstr &DefMI) { @@ -7102,16 +7101,17 @@ auto *TRI = MRI.getTargetRegisterInfo(); auto *DefBB = DefMI.getParent(); - const int MaxUseInstScan = 10; - int NumUseInst = 0; + const int MaxUseScan = 10; + int NumUse = 0; - for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) { + for (auto &Use : MRI.use_nodbg_operands(VReg)) { + auto &UseInst = *Use.getParent(); // Don't bother searching between blocks, although it is possible this block // doesn't modify exec. if (UseInst.getParent() != DefBB) return true; - if (++NumUseInst > MaxUseInstScan) + if (++NumUse > MaxUseScan) return true; } @@ -7120,18 +7120,31 @@ // Stop scan when we have seen all the uses. for (auto I = std::next(DefMI.getIterator()); ; ++I) { + assert(I != DefBB->end()); + if (I->isDebugInstr()) continue; if (++NumInst > MaxInstScan) return true; - if (I->readsRegister(VReg)) - if (--NumUseInst == 0) - return false; + for (unsigned OpI = 0, E = I->getNumOperands(); OpI != E; ++OpI) { + const auto &Op = I->getOperand(OpI); - if (I->modifiesRegister(AMDGPU::EXEC, TRI)) - return true; + if (Op.isRegMask() && Op.clobbersPhysReg(AMDGPU::EXEC)) + return true; + + if (!Op.isReg()) + continue; + + Register Reg = Op.getReg(); + if (Op.isUse()) { + if (Reg == VReg && --NumUse == 0) + return false; + } else // Reg is a def + if (TRI->regsOverlap(Reg, AMDGPU::EXEC)) + return true; + } } }