Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -417,7 +417,23 @@ const MachineInstr &MI, const MachineOperand &UseMO) { return !UseMO.isUndef() && !TII->isSDWA(MI); - //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg()); +} + +static bool isFoldingRegisterAcrossExecDef(const MachineInstr &MI, + const MachineOperand &UseMO) { + const MachineInstr *Def = UseMO.getParent(); + const MachineBasicBlock *MBB = MI.getParent(); + const MachineBasicBlock *DefBB = Def->getParent(); + bool CrossDefExec = false; + if (!const_cast(DefBB)->canFallThrough() && + (DefBB != MBB)) { + MachineBasicBlock::const_iterator IT = + Def->getParent()->getFirstTerminator(); + const MachineInstr *Term = &*IT; + CrossDefExec = Def->hasRegisterImplicitUseOperand(AMDGPU::EXEC) && + Term->definesRegister(AMDGPU::EXEC); + } + return CrossDefExec; } static bool tryToFoldACImm(const SIInstrInfo *TII, @@ -1086,6 +1102,9 @@ Copy->addImplicitDefUseOperands(*MF); for (FoldCandidate &Fold : FoldList) { + if (Fold.isReg() && + isFoldingRegisterAcrossExecDef(*Fold.UseMI, *Fold.OpToFold)) + continue; if (updateOperand(Fold, *TII, *TRI, *ST)) { // Clear kill flags. if (Fold.isReg()) {