diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -92,7 +92,7 @@ bool tryFoldCndMask(MachineInstr &MI) const; bool tryFoldZeroHighBits(MachineInstr &MI) const; - void foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const; + bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const; const MachineOperand *isClamp(const MachineInstr &MI) const; bool tryFoldClamp(MachineInstr &MI); @@ -1217,7 +1217,7 @@ return false; } -void SIFoldOperands::foldInstOperand(MachineInstr &MI, +bool SIFoldOperands::foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const { // We need mutate the operands of new mov instructions to add implicit // uses of EXEC, but adding them invalidates the use_iterator, so defer @@ -1225,6 +1225,7 @@ SmallVector CopiesToReplace; SmallVector FoldList; MachineOperand &Dst = MI.getOperand(0); + bool Changed = false; if (OpToFold.isImm()) { for (auto &UseMI : @@ -1237,8 +1238,10 @@ // We may also encounter cases where one or both operands are // immediates materialized into a register, which would ordinarily not // be folded due to multiple uses or operand constraints. - if (tryConstantFoldOp(*MRI, TII, &UseMI)) + if (tryConstantFoldOp(*MRI, TII, &UseMI)) { LLVM_DEBUG(dbgs() << "Constant folded " << UseMI); + Changed = true; + } } } @@ -1297,6 +1300,9 @@ } } + if (CopiesToReplace.empty() && FoldList.empty()) + return Changed; + MachineFunction *MF = MI.getParent()->getParent(); // Make sure we add EXEC uses to any new v_mov instructions created. for (MachineInstr *Copy : CopiesToReplace) @@ -1328,6 +1334,7 @@ TII->commuteInstruction(*Fold.UseMI, false); } } + return true; } // Clamp patterns are canonically selected to v_max_* instructions, so only @@ -1751,22 +1758,31 @@ bool IsIEEEMode = MFI->getMode().IEEE; bool HasNSZ = MFI->hasNoSignedZerosFPMath(); + bool Changed = false; for (MachineBasicBlock *MBB : depth_first(&MF)) { MachineOperand *CurrentKnownM0Val = nullptr; for (auto &MI : make_early_inc_range(*MBB)) { - tryFoldCndMask(MI); + Changed |= tryFoldCndMask(MI); - if (tryFoldZeroHighBits(MI)) + if (tryFoldZeroHighBits(MI)) { + Changed = true; continue; + } - if (MI.isRegSequence() && tryFoldRegSequence(MI)) + if (MI.isRegSequence() && tryFoldRegSequence(MI)) { + Changed = true; continue; + } - if (MI.isPHI() && tryFoldLCSSAPhi(MI)) + if (MI.isPHI() && tryFoldLCSSAPhi(MI)) { + Changed = true; continue; + } - if (MI.mayLoad() && tryFoldLoad(MI)) + if (MI.mayLoad() && tryFoldLoad(MI)) { + Changed = true; continue; + } if (!TII->isFoldableCopy(MI)) { // Saw an unknown clobber of m0, so we no longer know what it is. @@ -1777,7 +1793,7 @@ // instruction, and not the omod multiply. if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || !tryFoldOMod(MI)) - tryFoldClamp(MI); + Changed |= tryFoldClamp(MI); continue; } @@ -1788,6 +1804,7 @@ MachineOperand &NewM0Val = MI.getOperand(1); if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) { MI.eraseFromParent(); + Changed = true; continue; } @@ -1817,7 +1834,7 @@ if (!MI.getOperand(0).getReg().isVirtual()) continue; - foldInstOperand(MI, OpToFold); + Changed |= foldInstOperand(MI, OpToFold); // If we managed to fold all uses of this copy then we might as well // delete it now. @@ -1829,6 +1846,7 @@ auto &SrcOp = InstToErase->getOperand(1); auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register(); InstToErase->eraseFromParent(); + Changed = true; InstToErase = nullptr; if (!SrcReg || SrcReg.isPhysical()) break; @@ -1837,9 +1855,11 @@ break; } if (InstToErase && InstToErase->isRegSequence() && - MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) + MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) { InstToErase->eraseFromParent(); + Changed = true; + } } } - return true; + return Changed; }