Index: lib/Target/AMDGPU/GCNDPPCombine.cpp =================================================================== --- lib/Target/AMDGPU/GCNDPPCombine.cpp +++ lib/Target/AMDGPU/GCNDPPCombine.cpp @@ -331,7 +331,7 @@ auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst); assert(DstOpnd && DstOpnd->isReg()); auto DPPMovReg = DstOpnd->getReg(); - if (!isEXECMaskConstantBetweenDefAndUses(DPPMovReg, *MRI)) { + if (execMayBeModifiedBeforeUse(*MRI, DPPMovReg, MovMI)) { LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same" " for all uses\n"); return false; Index: lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- lib/Target/AMDGPU/SIFoldOperands.cpp +++ lib/Target/AMDGPU/SIFoldOperands.cpp @@ -525,8 +525,10 @@ // => // %sgpr = S_MOV_B32 imm if (FoldingImm) { - if (!isEXECMaskConstantBetweenDefAndUses( - UseMI->getOperand(UseOpIdx).getReg(), *MRI)) + if (execMayBeModifiedBeforeUse(*MRI, + UseMI->getOperand(UseOpIdx).getReg(), + *OpToFold.getParent(), + UseMI)) return; UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32)); @@ -536,8 +538,10 @@ } if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) { - if (!isEXECMaskConstantBetweenDefAndUses( - UseMI->getOperand(UseOpIdx).getReg(), *MRI)) + if (execMayBeModifiedBeforeUse(*MRI, + UseMI->getOperand(UseOpIdx).getReg(), + *OpToFold.getParent(), + UseMI)) return; // %vgpr = COPY %sgpr0 Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -972,11 +972,14 @@ MachineInstr *getVRegSubRegDef(const TargetInstrInfo::RegSubRegPair &P, MachineRegisterInfo &MRI); -/// \brief Return true if EXEC mask isnt' changed between the def and -/// all uses of VReg. Currently if def and uses are in different BBs - -/// simply return false. Should be run on SSA. -bool isEXECMaskConstantBetweenDefAndUses(unsigned VReg, - const MachineRegisterInfo &MRI); +/// \brief Return false if EXEC is not changed between the def of \p VReg at \p +/// DefMI and uses. If \p UseMI is not specified, this checks all uses of \p +/// VReg. Should be run on SSA. Currently does not attempt to track between +/// blocks. +bool execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, + unsigned VReg, + const MachineInstr &DefMI, + const MachineInstr *UseMI = nullptr); namespace AMDGPU { Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5933,28 +5933,49 @@ return nullptr; } -bool llvm::isEXECMaskConstantBetweenDefAndUses(unsigned VReg, - const MachineRegisterInfo &MRI) { +bool llvm::execMayBeModifiedBeforeUse(const MachineRegisterInfo &MRI, + unsigned VReg, + const MachineInstr &DefMI, + const MachineInstr *UseMI) { assert(MRI.isSSA() && "Must be run on SSA"); + assert(DefMI.definesRegister(VReg) && "wrong def instruction"); + auto *TRI = MRI.getTargetRegisterInfo(); + auto *DefBB = DefMI.getParent(); - auto *DefI = MRI.getVRegDef(VReg); - auto *BB = DefI->getParent(); + if (UseMI) { + // Don't bother searching between blocks, although it is possible this block + // doesn't modify exec. + if (UseMI->getParent() != DefBB) + return true; + } else { + int NumUse = 0; + const int MaxUseScan = 10; - DenseSet Uses; - for (auto &Use : MRI.use_nodbg_operands(VReg)) { - auto *I = Use.getParent(); - if (I->getParent() != BB) - return false; - Uses.insert(I); + for (auto &UseInst : MRI.use_nodbg_instructions(VReg)) { + if (UseInst.getParent() != DefBB) + return true; + + if (NumUse++ > MaxUseScan) + return true; + } } - auto E = BB->end(); - for (auto I = std::next(DefI->getIterator()); I != E; ++I) { - Uses.erase(&*I); - // don't check the last use - if (Uses.empty() || I->modifiesRegister(AMDGPU::EXEC, TRI)) - break; + const int MaxInstScan = 20; + int NumScan = 0; + + // Stop scan at the use if known. + auto E = UseMI ? UseMI->getIterator() : DefBB->end(); + for (auto I = std::next(DefMI.getIterator()); I != E; ++I) { + if (I->isDebugInstr()) + continue; + + if (NumScan++ > MaxInstScan) + return true; + + if (I->modifiesRegister(AMDGPU::EXEC, TRI)) + return true; } - return Uses.empty(); + + return false; }