diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -205,6 +205,8 @@ MachineRegisterInfo *MRI = nullptr; AliasAnalysis *AA = nullptr; bool OptimizeAgain; + bool MergeableInvalid; + std::set MIMovedList; static bool dmasksCanBeCombined(const CombineInfo &CI, const SIInstrInfo &TII, @@ -269,6 +271,8 @@ bool collectMergeableInsts(MachineBasicBlock &MBB, std::list > &MergeableInsts) const; + bool isCleanMergeList(std::list &MergeList) const; + public: static char ID; @@ -278,7 +282,8 @@ bool optimizeInstsWithSameBaseAddr(std::list &MergeList, bool &OptimizeListAgain); - bool optimizeBlock(std::list > &MergeableInsts); + bool optimizeBlock(std::list > &MergeableInsts, + MachineBasicBlock &MBB); bool runOnMachineFunction(MachineFunction &MF) override; @@ -583,12 +588,14 @@ } static void moveInstsAfter(MachineBasicBlock::iterator I, - ArrayRef InstsToMove) { + ArrayRef InstsToMove, + std::set &MovedList) { MachineBasicBlock *MBB = I->getParent(); ++I; for (MachineInstr *MI : InstsToMove) { MI->removeFromParent(); MBB->insert(I, MI); + MovedList.insert(MI); } } @@ -1056,7 +1063,7 @@ .add(*Dest1) .addReg(DestReg, RegState::Kill, SubRegIdx1); - moveInstsAfter(Copy1, InstsToMove); + moveInstsAfter(Copy1, InstsToMove, MIMovedList); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1140,7 +1147,7 @@ .addImm(0) // gds .cloneMergedMemRefs({&*CI.I, &*Paired.I}); - moveInstsAfter(Write2, InstsToMove); + moveInstsAfter(Write2, InstsToMove, MIMovedList); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1196,7 +1203,7 @@ .add(*Dest1) .addReg(DestReg, RegState::Kill, SubRegIdx1); - moveInstsAfter(Copy1, InstsToMove); + moveInstsAfter(Copy1, InstsToMove, MIMovedList); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1247,7 +1254,7 @@ .add(*Dest1) .addReg(DestReg, RegState::Kill, SubRegIdx1); - moveInstsAfter(Copy1, InstsToMove); + moveInstsAfter(Copy1, InstsToMove, MIMovedList); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1310,7 +1317,7 @@ .add(*Dest1) .addReg(DestReg, RegState::Kill, SubRegIdx1); - moveInstsAfter(Copy1, InstsToMove); + moveInstsAfter(Copy1, InstsToMove, MIMovedList); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1378,7 +1385,7 @@ .add(*Dest1) .addReg(DestReg, RegState::Kill, SubRegIdx1); - moveInstsAfter(Copy1, InstsToMove); + moveInstsAfter(Copy1, InstsToMove, MIMovedList); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1442,7 +1449,7 @@ .addMemOperand( combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); - moveInstsAfter(MIB, InstsToMove); + moveInstsAfter(MIB, InstsToMove, MIMovedList); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -1601,7 +1608,7 @@ .addImm(0) // swz .addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb)); - moveInstsAfter(MIB, InstsToMove); + moveInstsAfter(MIB, InstsToMove, MIMovedList); CI.I->eraseFromParent(); Paired.I->eraseFromParent(); @@ -2001,17 +2008,32 @@ return Modified; } +bool SILoadStoreOptimizer::isCleanMergeList(std::list &MergeList) const { + for (auto I : MergeList) { + MachineInstr* MI = &*(I.I); + if (MIMovedList.count(MI)) + return false; + } + return true; +} + // Scan through looking for adjacent LDS operations with constant offsets from // the same base register. We rely on the scheduler to do the hard work of // clustering nearby loads, and assume these are all adjacent. bool SILoadStoreOptimizer::optimizeBlock( - std::list > &MergeableInsts) { + std::list > &MergeableInsts, + MachineBasicBlock &MBB) { bool Modified = false; for (std::list>::iterator I = MergeableInsts.begin(), E = MergeableInsts.end(); I != E;) { std::list &MergeList = *I; + if (!isCleanMergeList(MergeList)) { + MergeableInvalid = true; + return Modified; + } + bool OptimizeListAgain = false; if (!optimizeInstsWithSameBaseAddr(MergeList, OptimizeListAgain)) { // We weren't able to make any changes, so delete the list so we don't @@ -2152,15 +2174,23 @@ bool Modified = false; - for (MachineBasicBlock &MBB : MF) { - std::list > MergeableInsts; // First pass: Collect list of all instructions we know how to merge. - Modified |= collectMergeableInsts(MBB, MergeableInsts); do { - OptimizeAgain = false; - Modified |= optimizeBlock(MergeableInsts); - } while (OptimizeAgain); + std::list > MergeableInsts; + Modified |= collectMergeableInsts(MBB, MergeableInsts); + MergeableInvalid = false; + MIMovedList.clear(); + + do { + OptimizeAgain = false; + Modified |= optimizeBlock(MergeableInsts, MBB); + } while (OptimizeAgain && !MergeableInvalid); + + // Has a transformation occurred that invalidates the MergeableInsts list? + // This can happen if other mergeable instructions are moved when merging + // a different list + } while(MergeableInvalid); } return Modified;