Index: include/llvm/CodeGen/Passes.h =================================================================== --- include/llvm/CodeGen/Passes.h +++ include/llvm/CodeGen/Passes.h @@ -442,6 +442,9 @@ /// basic blocks. extern char &SpillPlacementID; + /// Split critical edges for loops pass. + extern char &SplitCritEdgesID; + /// ShrinkWrap pass. Look for the best place to insert save and restore // instruction and update the MachineFunctionInfo with that information. extern char &ShrinkWrapID; Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -249,6 +249,7 @@ void initializeSCEVAAWrapperPassPass(PassRegistry&); void initializeScalarEvolutionWrapperPassPass(PassRegistry&); void initializeShrinkWrapPass(PassRegistry &); +void initializeSplitCritEdgesPass(PassRegistry &); void initializeSimpleInlinerPass(PassRegistry&); void initializeShadowStackGCLoweringPass(PassRegistry&); void initializeRegisterCoalescerPass(PassRegistry&); Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -132,6 +132,7 @@ UnreachableBlockElim.cpp VirtRegMap.cpp WinEHPrepare.cpp + SplitCritEdges.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/CodeGen Index: lib/CodeGen/CodeGen.cpp =================================================================== --- lib/CodeGen/CodeGen.cpp +++ lib/CodeGen/CodeGen.cpp @@ -65,6 +65,7 @@ initializeRegisterCoalescerPass(Registry); initializeShrinkWrapPass(Registry); initializeSlotIndexesPass(Registry); + initializeSplitCritEdgesPass(Registry); initializeStackColoringPass(Registry); initializeStackMapLivenessPass(Registry); initializeLiveDebugValuesPass(Registry); Index: lib/CodeGen/Passes.cpp =================================================================== --- lib/CodeGen/Passes.cpp +++ lib/CodeGen/Passes.cpp @@ -74,6 +74,9 @@ cl::desc("Disable Copy Propagation pass")); static cl::opt DisablePartialLibcallInlining("disable-partial-libcall-inlining", cl::Hidden, cl::desc("Disable Partial Libcall Inlining")); +static cl::opt + DisableSCEdge("disable-scedge", cl::Hidden, + cl::desc("Disable Split Critical Edge for loops pass")); static cl::opt EnableImplicitNullChecks( "enable-implicit-null-checks", cl::desc("Fold null checks into faulting memory operations"), @@ -545,6 +548,9 @@ // Run pre-ra passes. addPreRegAlloc(); + if (!DisableSCEdge) + addPass(&SplitCritEdgesID); + // Run register allocation and passes that are tightly coupled with it, // including phi elimination and scheduling. if (getOptimizeRegAlloc()) Index: lib/CodeGen/RegAllocGreedy.cpp =================================================================== --- lib/CodeGen/RegAllocGreedy.cpp +++ lib/CodeGen/RegAllocGreedy.cpp @@ -62,7 +62,7 @@ clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), clEnumValEnd), - cl::init(SplitEditor::SM_Partition)); + cl::init(SplitEditor::SM_Size)); static cl::opt LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, @@ -157,6 +157,9 @@ /// Attempt live range splitting if assignment is impossible. RS_Split, + /// Attempt live range splitting around loops. + RS_LoopSplit, + /// Attempt more aggressive live range splitting that is guaranteed to make /// progress. This is used for split products that may not be making /// progress. @@ -165,7 +168,6 @@ /// Live range will be spilled. No more splitting will be attempted. RS_Spill, - /// Live range is in memory. Because of other evictions, it might get moved /// in a register in the end. RS_Memory, @@ -312,6 +314,13 @@ /// Set of broken hints that may be reconciled later because of eviction. SmallSetVector SetOfBrokenHints; + /// Set of Loops for which all the vregs crossing its entry or exit have been + /// splitted. + SmallPtrSet SplittedLoops; + + /// Set of LiveInterval which should be removed from Queue. + SmallPtrSet RemovedFromQ; + public: RAGreedy(); @@ -360,11 +369,17 @@ bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters); + void splitAllAroundLoop(MachineLoop *Loop, + SmallVectorImpl &NewVRegs); unsigned tryAssign(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned tryEvict(LiveInterval&, AllocationOrder&, SmallVectorImpl&, unsigned = ~0u); + bool trySplitRegAroundLoop(LiveInterval &VirtReg, + SmallVectorImpl &NewVRegs); + bool trySplitLoopInAll(LiveInterval &VirtReg, + SmallVectorImpl &NewVRegs); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); /// Calculate cost of region splitting. @@ -422,14 +437,8 @@ #ifndef NDEBUG const char *const RAGreedy::StageName[] = { - "RS_New", - "RS_Assign", - "RS_Split", - "RS_Split2", - "RS_Spill", - "RS_Memory", - "RS_Done" -}; + "RS_New", "RS_Assign", "RS_Split", "RS_LoopSplit", + "RS_Split2", "RS_Spill", "RS_Memory", "RS_Done"}; #endif // Hysteresis to use when comparing floats. @@ -600,11 +609,13 @@ LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { - if (CurQueue.empty()) - return nullptr; - LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); - CurQueue.pop(); - return LI; + while (!CurQueue.empty()) { + LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); + CurQueue.pop(); + if (!RemovedFromQ.count(LI)) + return LI; + } + return nullptr; } @@ -1342,6 +1353,244 @@ MF->verify(this, "After splitting live range around region"); } +/// isHotLoop - the loop is regarded as hot if the freq of its header is much +/// larger than its preheader. +static bool isHotLoop(MachineLoop *Loop, MachineBlockFrequencyInfo *MBFI) { + MachineBasicBlock *Preheader = Loop->getLoopPreheader(); + if (!Preheader) + return false; + MachineBasicBlock *Header = Loop->getHeader(); + unsigned Ratio = MBFI->getBlockFreq(Header).getFrequency() / + MBFI->getBlockFreq(Preheader).getFrequency(); + return Ratio >= 10; +} + +/// noCriticalEdge - The loop has no critical edges if the entry and exit edges +/// are not critical edges. +static bool noCriticalEdge(MachineLoop *Loop) { + MachineBasicBlock *Preheader = Loop->getLoopPreheader(); + if (!Preheader) + return false; + + SmallVector ExitBlocks; + SmallPtrSet UniqExitBlocks; + Loop->getExitBlocks(ExitBlocks); + UniqExitBlocks.insert(ExitBlocks.begin(), ExitBlocks.end()); + for (auto ExitBlock : UniqExitBlocks) { + if (ExitBlock->pred_size() > 1) + return false; + } + return true; +} + +/// crossLoopBoundary - Literally, if the VirtReg is livein at the entry of +/// an exit block or liveout at the exit of the preheader block, we say it +/// crosses the loop boundary. +/// However, a VirtReg after being splitted for a loop still cross the loop +/// boundary. To avoid recursively splitting the same VirtReg, the +/// concept of crossLoopBoundary is extended a little bit: If the VirtReg +/// lives through the entry of an exit block or lives through the exit of +/// the preheader block, we say it crosses the loop boundary. +static bool crossLoopBoundary(LiveInterval &VirtReg, MachineLoop *Loop, + LiveIntervals *LIS) { + // VirtReg crosses loop entry boundary if it lives through the loop + // preheader block. + bool Cross = LIS->isLiveInToMBB(VirtReg, Loop->getLoopPreheader()) && + LIS->isLiveOutOfMBB(VirtReg, Loop->getLoopPreheader()); + + // VirtReg crosses loop entry boundary if it lives through any loop + // exit block. + SmallVector ExitBlocks; + Loop->getExitBlocks(ExitBlocks); + for (auto &ExitBlock : ExitBlocks) + Cross = Cross || (LIS->isLiveOutOfMBB(VirtReg, ExitBlock) && + LIS->isLiveInToMBB(VirtReg, ExitBlock)); + return Cross; +} + +/// trySplitLoopInAll - For the innermost hot loop covering the live range of +/// VirtReg, Split all the virtregs which live across the loop boundary. +/// Covering here means VirtReg is totally inside the loop, or its live range +/// starts at the loop Preheader and ends inside the loop, or its live range +/// starts inside the loop and ends at the loop ExitBlock. +/// The fact that VirtReg didn't get a Physical reg means the loop covering +/// it has high register pressure. Splitting all the crossing virtregs at the +/// loop boundary helps the best register allocation for virtreg references +/// inside the loop. +bool RAGreedy::trySplitLoopInAll(LiveInterval &VirtReg, + SmallVectorImpl &NewVRegs) { + MachineLoop *CandLoop = nullptr; + ArrayRef UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + const MachineBasicBlock *MBB = BI.MBB; + // If MBB is not inside any loop, skip it. + if (!Loops->getLoopDepth(MBB)) + continue; + + // Find the loop which the live range of VirtReg is inside or just + // covers. + MachineLoop *Loop = Loops->getLoopFor(MBB); + while (Loop) { + if (noCriticalEdge(Loop) && isHotLoop(Loop, MBFI) && + !crossLoopBoundary(VirtReg, Loop, LIS)) + break; + Loop = Loop->getParentLoop(); + } + if (!SplittedLoops.count(Loop)) { + CandLoop = Loop; + break; + } + } + + if (!CandLoop) + return false; + +#ifndef NDEBUG + // Assume there is no separate live range outside CandLoop. + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + const MachineBasicBlock *MBB = BI.MBB; + if (CandLoop && CandLoop->contains(MBB)) + continue; + if (MBB == CandLoop->getLoopPreheader()) + continue; + SmallVector ExitBlocks; + CandLoop->getExitBlocks(ExitBlocks); + bool found = false; + for (auto &ExitBlock : ExitBlocks) { + if (MBB == ExitBlock) { + found = true; + break; + } + } + assert(found && "Separate live ranges from the same vreg"); + } +#endif + + DEBUG(dbgs() << "Split all vregs for " << *CandLoop); + splitAllAroundLoop(CandLoop, NewVRegs); + return true; +} + +/// trySplitRegAroundLoop - VirtReg didn't get a Physical reg assigned before +/// splitting. If it lives across a loop and it has reference inside the loop, +/// find the outermost such loop and split the VirtReg around it. So the +/// new VirtReg with shorter live range just around the loop will have better +/// chance to get a Physical register, and its reference inside the loop +/// may not need to be spilled. +/// Another benefit of splitting VirtReg around loop is if the new VirtReg +/// around the loop still cannot get Physical register, it indicates the loop +/// has high enough register pressure, so we may go further step to split all +/// the virtregs acrossing the loop's boundary by calling splitAllAroundLoop +/// in the next round of selectOrSplit. +bool RAGreedy::trySplitRegAroundLoop(LiveInterval &VirtReg, + SmallVectorImpl &NewVRegs) { + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + // Here we cannot use SplitSpillMode other than SM_Partition mode because + // hoisting backcopies can keep generating new VReg living across loop and + // cause infinite loop. + SE->reset(LREdit, SplitEditor::SM_Partition); + + SmallPtrSet CandLoopSet; + // Search each reference of VirtReg inside a hot loop. + ArrayRef UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + const MachineBasicBlock *MBB = BI.MBB; + // If MBB is not inside any loop, skip it. + if (!Loops->getLoopDepth(MBB)) + continue; + // If MBB is already inside a CandLoop, don't try to find candidate + // loop for it. + bool InCandLoop = false; + for (auto CandLoop : CandLoopSet) + if (CandLoop->contains(MBB)) + InCandLoop = true; + if (InCandLoop) + continue; + + // Search the outermost hot loop which VirtReg lives across. + MachineLoop *CandLoop = nullptr; + MachineLoop *Loop = Loops->getLoopFor(MBB); + while (Loop) { + if (noCriticalEdge(Loop) && isHotLoop(Loop, MBFI) && + crossLoopBoundary(VirtReg, Loop, LIS)) + CandLoop = Loop; + Loop = Loop->getParentLoop(); + } + if (CandLoop) + CandLoopSet.insert(CandLoop); + } + + if (CandLoopSet.empty()) + return false; + + for (auto CandLoop : CandLoopSet) { + DEBUG(dbgs() << "Split " << PrintReg(VirtReg.reg, TRI) << " for " + << *CandLoop); + SE->splitAroundLoop(VirtReg, CandLoop, true); + } + SmallVector IntvMap; + SE->finish(&IntvMap); + return true; +} + +/// splitAllAroundLoop - Split all the virtregs crossing loop at the loop +/// boundary, including those already got Physical register assigned. +void RAGreedy::splitAllAroundLoop(MachineLoop *Loop, + SmallVectorImpl &NewVRegs) { + SmallVector LSNewVRegs; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + + LiveInterval &LI = LIS->getInterval(Reg); + if (crossLoopBoundary(LI, Loop, LIS)) { + DEBUG(dbgs() << PrintReg(Reg, TRI)); + // For those VirtRegs already been assigned Physical register, unassigned + // the original VirtReg, split it and assign the same Physical register to + // the new VirtRegs. + // For those VirtRegs not been assigned yet, they need to be removed from + // Queue after the splitting. + unsigned PhysReg = 0; + bool hasPhys = VRM->hasPhys(Reg); + if (hasPhys) { + PhysReg = VRM->getPhys(Reg); + DEBUG(dbgs() << " with [" << PrintReg(PhysReg, TRI) << "] is splitted"); + Matrix->unassign(LI); + } else { + DEBUG(dbgs() << " is splitted:\n"); + } + + LSNewVRegs.clear(); + LiveRangeEdit LREdit(&LI, LSNewVRegs, *MF, *LIS, VRM, this); + // For the splitting of VirtReg with physical register assigned, cannot use + // SplitSpillMode other than SM_Partition mode because other mode may hoist + // backcopies and produce overlapped live ranges. + // For the splitting of VirtReg not being assigned, still cannot use + // SplitSpillMode other than SM_Partition mode because hoisting backcopies + // can keep generating new VReg living across loop and cause infinite loop. + SE->reset(LREdit, SplitEditor::SM_Partition); + SE->splitAroundLoop(LI, Loop, !hasPhys); + + SmallVector IntvMap; + SE->finish(&IntvMap); + for (auto NewVReg : LSNewVRegs) { + if (hasPhys) + Matrix->assign(LIS->getInterval(NewVReg), PhysReg); + else { + // Remove the original VirtReg from Queue. + RemovedFromQ.insert(&LI); + NewVRegs.push_back(NewVReg); + } + } + } + } + SplittedLoops.insert(Loop); +} + unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { unsigned NumCands = 0; @@ -1970,6 +2219,32 @@ return PhysReg; } + // Loop split is to try to let references inside hot loops get their best + // chance to get physical register assigned. It achieves that by splitting + // virtregs at the loop boundary so the live ranges outside loop will not + // affect the coloring of live ranges inside loop. + // + // This is achieved in two steps. For a VirtReg with a long live range + // across a loop and there is reference of it inside the loop, call + // trySplitRegAroundLoop to split the VirtReg at the loop boundary. In the + // next round of selectOrSplit, if the new VirtReg after splitting around + // the loop gets physical register, it is a good result. If the new VirtReg + // still cannot get physical register, it indicates precisely the loop has + // high register pressure, and trySplitLoopInAll will be called to split + // all the other virtregs living across the loop and make the loop a + // relatively independent register allocation region. + if (getStage(VirtReg) < RS_LoopSplit) { + bool LoopSplitted = trySplitLoopInAll(VirtReg, NewVRegs); + bool RegSplitted = trySplitRegAroundLoop(VirtReg, NewVRegs); + if (LoopSplitted || RegSplitted) { + if (LoopSplitted && !RegSplitted) { + NewVRegs.push_back(VirtReg.reg); + setStage(VirtReg, RS_LoopSplit); + } + return 0; + } + } + // First try to split around a region spanning multiple blocks. RS_Split2 // ranges already made dubious progress with region splitting, so they go // straight to single block splitting. @@ -2598,6 +2873,7 @@ IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); GlobalCand.resize(32); // This will grow as needed. SetOfBrokenHints.clear(); + RemovedFromQ.clear(); allocatePhysRegs(); tryHintsRecoloring(); Index: lib/CodeGen/SplitCritEdges.cpp =================================================================== --- lib/CodeGen/SplitCritEdges.cpp +++ lib/CodeGen/SplitCritEdges.cpp @@ -0,0 +1,222 @@ +//=== SplitCritEdges.cpp - Split critical edges for loop entry and exit ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs critical edges splitting for edges at the loop entry or +// exits. It also creates a loop preheader if the loop header has multiple +// predecessors. It is necessary to enable register splitting around loops, +// and it is also helpful for better register coalescing and shrinkwrapping. +// +//===----------------------------------------------------------------------===// +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +#define DEBUG_TYPE "critedges-split" + +using namespace llvm; + +namespace { +class SplitCritEdges : public MachineFunctionPass { + MachineDominatorTree *MDT; + MachineLoopInfo *MLI; + +public: + static char ID; + + SplitCritEdges() : MachineFunctionPass(ID) { + initializeSplitCritEdgesPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const override { + return "Split Critical Edges for RegAlloc"; + } + bool createLoopPreheader(MachineFunction &MF, MachineLoop *Loop); + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // End anonymous namespace. + +char SplitCritEdges::ID = 0; +char &llvm::SplitCritEdgesID = SplitCritEdges::ID; + +INITIALIZE_PASS_BEGIN(SplitCritEdges, "critedges-split", + "Critical Edges Split Pass", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(SplitCritEdges, "critedges-split", + "Critical Edges Split Pass", false, false) + +// If the header of the Loop has multiple predecessors outside of the loop, +// a new Loop preheader will be inserted and it will become the only predecessor +// of the header block outside of the loop. +bool SplitCritEdges::createLoopPreheader(MachineFunction &MF, + MachineLoop *Loop) { + MachineBasicBlock *Header = Loop->getHeader(); + MachineDomTreeNode *IDom = MDT->getNode(Header)->getIDom(); + + // Predecessors of Header inside of loop. + SmallPtrSet PredsIn; + // Predecessors of Header outside of loop. + SmallPtrSet PredsOut; + MachineBasicBlock *TBB, *FBB; + SmallVector Cond; + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + for (auto Pred : Header->predecessors()) { + TBB = nullptr; + FBB = nullptr; + Cond.clear(); + if (TII->AnalyzeBranch(*Pred, TBB, FBB, Cond)) + return false; + if (!Loop->contains(Pred)) + PredsOut.insert(Pred); + else + PredsIn.insert(Pred); + } + + MachineBasicBlock *NMBB = MF.CreateMachineBasicBlock(); + MF.insert(MachineFunction::iterator(Header), NMBB); + + // Update CFG and terminator for predecessor blocks outside loop. + for (auto Pred : PredsOut) { + Pred->ReplaceUsesOfBlockWith(Header, NMBB); + Pred->updateTerminator(); + } + // Update terminator for predecessor blocks inside loop. + for (auto Pred : PredsIn) + Pred->updateTerminator(); + // Update CFG and terminator for the new block. + DebugLoc DL; + NMBB->addSuccessor(Header); + if (!NMBB->isLayoutSuccessor(Header)) { + Cond.clear(); + TII->InsertBranch(*NMBB, Header, nullptr, Cond, DL); + } + + // Update PHI nodes: + // Original PHI node in Header looks like: + // r1_0 = PHI(r1_1, PB_in, r1_2, PB_out1, r1_3, PB_out2) + // PB_in is the Predecessor Block inside loop. PB_outX is the Xth + // Predecessor Block outside loop. + // After the update, a new PHI is generated in NMBB: + // r1_4 = PHI(r1_2, PB_out1, r1_3, PB_out2) + // And the old PHI in Header Block is changed to: + // r1_0 = PHI(r1_1, PB_in, r1_4, NMBB) + MachineRegisterInfo &MRI = MF.getRegInfo(); + for (MachineBasicBlock::instr_iterator I = Header->instr_begin(), + E = Header->instr_end(); + I != E && I->isPHI(); ++I) { + MachineInstr *OldPHI = &*I; + MachineInstr *NewPHI = TII->duplicate(OldPHI, MF); + MachineOperand *MO = &NewPHI->getOperand(0); + assert(MO->isDef() && "NewPHI->Operand(0) should be a Def"); + unsigned Reg = MO->getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(Reg); + unsigned NewReg = MRI.createVirtualRegister(RC); + MO->setReg(NewReg); + // Remove operands of which the source BB is not in PredsOut from NewPHI. + for (unsigned i = NewPHI->getNumOperands() - 1; i >= 2; i -= 2) { + MO = &NewPHI->getOperand(i); + if (!PredsOut.count(MO->getMBB())) { + NewPHI->RemoveOperand(i); + NewPHI->RemoveOperand(i - 1); + } + } + // Remove operands of which the source BB is in PredsOut from OldPHI. + for (unsigned i = OldPHI->getNumOperands() - 1; i >= 2; i -= 2) { + MO = &OldPHI->getOperand(i); + if (PredsOut.count(MO->getMBB())) { + OldPHI->RemoveOperand(i); + OldPHI->RemoveOperand(i - 1); + } + } + MachineInstrBuilder MIB(MF, OldPHI); + MIB.addReg(NewReg).addMBB(NMBB); + NMBB->insert(NMBB->SkipPHIsAndLabels(NMBB->begin()), NewPHI); + } + + // Update MDT. NMBB becomes the new immediate dominator node of Header. + MDT->addNewBlock(NMBB, IDom->getBlock()); + MDT->changeImmediateDominator(Header, NMBB); + + // Update MLI. + if (MachineLoop *P = Loop->getParentLoop()) + P->addBasicBlockToLoop(NMBB, MLI->getBase()); + return true; +} + +bool SplitCritEdges::runOnMachineFunction(MachineFunction &MF) { + MDT = &getAnalysis(); + MLI = &getAnalysis(); + + SmallVector Worklist(MLI->begin(), MLI->end()); + while (!Worklist.empty()) { + MachineLoop *CurLoop = Worklist.pop_back_val(); + + MachineBasicBlock *Preheader = CurLoop->getLoopPreheader(); + if (!Preheader) { + MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); + if (Pred && Pred->SplitCriticalEdge(CurLoop->getHeader(), this)) { + DEBUG(dbgs() << "Split critical entry edge for " << *CurLoop); + DEBUG(dbgs() << "BB#" << Pred->getNumber() << " --> " + << CurLoop->getHeader()->getNumber() << "\n"); + } else if (createLoopPreheader(MF, CurLoop)) { + DEBUG(dbgs() << "Create a new loop header for " << *CurLoop); + DEBUG(dbgs() << "new preheader BB#" + << CurLoop->getLoopPreheader()->getNumber() << "\n"); + } + } + + SmallVector ExitBlocks; + SmallPtrSet UniqExitBlocks; + SmallPtrSet PredBlocks; + CurLoop->getExitBlocks(ExitBlocks); + UniqExitBlocks.insert(ExitBlocks.begin(), ExitBlocks.end()); + for (auto ExitBlock : UniqExitBlocks) { + bool ToSplit = false; + for (auto PredBB : ExitBlock->predecessors()) { + if (!CurLoop->contains(PredBB)) { + ToSplit = true; + break; + } + } + if (ToSplit) { + // When splitting critical edges, ExitBlock->predecessors() will + // be changed, so save ExitBlock->predecessors() in PredBlocks + // before doing any splitting. + PredBlocks.insert(ExitBlock->predecessors().begin(), + ExitBlock->predecessors().end()); + for (auto PredBB : PredBlocks) { + if (CurLoop->contains(PredBB) && + PredBB->SplitCriticalEdge(ExitBlock, this)) { + DEBUG(dbgs() << "Split critical exit edge for " << *CurLoop); + DEBUG(dbgs() << "BB#" << PredBB->getNumber() << " --> " + << ExitBlock->getNumber() << "\n"); + } + } + PredBlocks.clear(); + } + } + + Worklist.append(CurLoop->begin(), CurLoop->end()); + } + return false; +} Index: lib/CodeGen/SplitKit.h =================================================================== --- lib/CodeGen/SplitKit.h +++ lib/CodeGen/SplitKit.h @@ -29,6 +29,7 @@ class LiveRangeEdit; class MachineBlockFrequencyInfo; class MachineInstr; +class MachineLoop; class MachineLoopInfo; class MachineRegisterInfo; class TargetInstrInfo; @@ -367,6 +368,9 @@ /// selectIntv - Select a previously opened interval index. void selectIntv(unsigned Idx); + /// splitAroundLoop - Split VirtReg around the loop. + void splitAroundLoop(LiveInterval &VirtReg, MachineLoop *Loop, bool Tight); + /// enterIntvBefore - Enter the open interval before the instruction at Idx. /// If the parent interval is not live before Idx, a COPY is not inserted. /// Return the beginning of the new live range. @@ -399,6 +403,7 @@ /// Add liveness from the MBB top to the copy. /// Return the end of the live range. SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB); + SlotIndex leaveIntvAtEnd(MachineBasicBlock &MBB); /// overlapIntv - Indicate that all instructions in range should use the open /// interval, but also let the complement interval be live. Index: lib/CodeGen/SplitKit.cpp =================================================================== --- lib/CodeGen/SplitKit.cpp +++ lib/CodeGen/SplitKit.cpp @@ -466,6 +466,92 @@ OpenIdx = Idx; } +/// Split the VirtReg in the Preheader Block or ExitBlock of the loop. +/// +/// If Tight is true, VirtReg will be splitted at the bottom of the Preheader +/// Block or at the top of the ExitBlock. If Tight is false, VirtReg will be +/// splitted at the top of the Preheader Block or at the bottom of the +/// ExitBlock. If the VirtReg to be splitted hasn't been assigned physical +/// register yet, we want to split it tightly because the new VirtReg after +/// split will has less interference with other VirtRegs inside Preheader or +/// ExitBlock. If the VirtReg to be splitted here has physical register +/// assigned before the splitting, we want to split it loosely because if +/// the new VirtReg after split is evicted, the physical register can be used +/// by other new VirtRegs also generated by loop split and tightly around loop. +void SplitEditor::splitAroundLoop(LiveInterval &VirtReg, MachineLoop *Loop, + bool Tight) { + openIntv(); + // Add copy to Preheader if VirtReg is live at the entry of Preheader. + // The copy will be from the remainder interval to the new interval. + MachineBasicBlock *Preheader = Loop->getLoopPreheader(); + if (LIS.isLiveInToMBB(VirtReg, Preheader) && + LIS.isLiveOutOfMBB(VirtReg, Preheader)) { + if (Tight) { + // Split the loop tightly, .i.e, split at the bottom of preheader BB. + MachineBasicBlock::const_iterator FirstTerm = + Preheader->getFirstTerminator(); + MachineBasicBlock::const_iterator EndIter = Preheader->end(); + if (FirstTerm != EndIter) { + SlotIndex Idx = enterIntvBefore(LIS.getInstructionIndex(FirstTerm)); + SlotIndex Stop = LIS.getSlotIndexes()->getMBBEndIdx(Preheader); + useIntv(Idx, Stop); + } else { + enterIntvAtEnd(*Preheader); + } + } else { + // Split the loop loosely, .i.e, split at the top of preheader BB. + MachineBasicBlock::iterator FirstIt = + (*Preheader->SkipPHIsAndLabels(Preheader->begin())); + if (FirstIt != Preheader->end()) { + SlotIndex FirstIdx = LIS.getInstructionIndex(FirstIt); + SlotIndex Idx = enterIntvBefore(FirstIdx); + SlotIndex Stop = LIS.getSlotIndexes()->getMBBEndIdx(Preheader); + useIntv(Idx, Stop); + } else { + enterIntvAtEnd(*Preheader); + } + } + } else if (LIS.isLiveOutOfMBB(VirtReg, Preheader)) { + useIntv(*Preheader); + } + + // Add copy to ExitBlock if VirtReg is live at the exit of the ExitBlock. + // The copy will be from the new interval to the remainder interval. + SmallVector ExitBlocks; + Loop->getExitBlocks(ExitBlocks); + SmallPtrSet UniqExitBlocks; + UniqExitBlocks.insert(ExitBlocks.begin(), ExitBlocks.end()); + for (MachineBasicBlock *ExitBlock : UniqExitBlocks) { + if (LIS.isLiveOutOfMBB(VirtReg, ExitBlock) && + LIS.isLiveInToMBB(VirtReg, ExitBlock)) { + if (Tight) { + // Split the loop tightly at the exit, .i.e, split at the top of + // ExitBlock. + leaveIntvAtTop(*ExitBlock); + } else { + // Split the loop loosely at the exit, .i.e, split at the bottom of + // ExitBlock. + MachineBasicBlock::const_iterator FirstTerm = + ExitBlock->getFirstTerminator(); + MachineBasicBlock::const_iterator EndIter = ExitBlock->end(); + if (FirstTerm != EndIter) { + SlotIndex Idx = leaveIntvBefore(LIS.getInstructionIndex(FirstTerm)); + SlotIndex Start = LIS.getSlotIndexes()->getMBBStartIdx(ExitBlock); + useIntv(Start, Idx); + } else { + leaveIntvAtEnd(*ExitBlock); + } + } + } else if (LIS.isLiveInToMBB(VirtReg, ExitBlock)) { + useIntv(*ExitBlock); + } + } + + // For the Block inside loop, use the new interval. + for (MachineBasicBlock *LoopBB : Loop->getBlocks()) + useIntv(*LoopBB); +} + SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { assert(OpenIdx && "openIntv not called before enterIntvBefore"); DEBUG(dbgs() << " enterIntvBefore " << Idx); @@ -599,6 +685,24 @@ return VNI->def; } +SlotIndex SplitEditor::leaveIntvAtEnd(MachineBasicBlock &MBB) { + assert(OpenIdx && "openIntv not called before leaveIntvAtEnd"); + SlotIndex Start = LIS.getMBBStartIdx(&MBB); + SlotIndex Last = LIS.getMBBEndIdx(&MBB).getPrevSlot(); + DEBUG(dbgs() << " leaveIntvAtEnd BB#" << MBB.getNumber() << ", " << Last); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Last; + } + DEBUG(dbgs() << ": valno " << ParentVNI->id); + VNInfo *VNI = + defFromParent(0, ParentVNI, Last, MBB, SA.getLastSplitPointIter(&MBB)); + RegAssign.insert(Start, VNI->def, OpenIdx); + DEBUG(dump()); + return VNI->def; +} + void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { assert(OpenIdx && "openIntv not called before overlapIntv"); const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);