Index: llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h +++ llvm/trunk/include/llvm/Transforms/Utils/UnrollLoop.h @@ -56,7 +56,7 @@ TargetTransformInfo::UnrollingPreferences &UP); bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, - DominatorTree *DT, bool PreserveLCSSA); + DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name); } Index: llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnroll.cpp @@ -51,6 +51,16 @@ cl::desc("Allow runtime unrolled loops to be unrolled " "with epilog instead of prolog.")); +static cl::opt +UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden, + cl::desc("Verify domtree after unrolling"), +#ifdef NDEBUG + cl::init(false) +#else + cl::init(true) +#endif + ); + /// Convert the instruction operands from referencing the current values into /// those specified by VMap. static inline void remapInstruction(Instruction *I, @@ -327,7 +337,7 @@ "and peeling for the same loop"); if (PeelCount) - peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA); + peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA); // Loops containing convergent instructions must have a count that divides // their TripMultiple. @@ -612,14 +622,11 @@ Term->eraseFromParent(); } } + // Update dominators of blocks we might reach through exits. // Immediate dominator of such block might change, because we add more // routes which can lead to the exit: we can now reach it from the copied - // iterations too. Thus, the new idom of the block will be the nearest - // common dominator of the previous idom and common dominator of all copies of - // the previous idom. This is equivalent to the nearest common dominator of - // the previous idom and the first latch, which dominates all copies of the - // previous idom. + // iterations too. if (DT && Count > 1) { for (auto *BB : OriginalLoopBlocks) { auto *BBDomNode = DT->getNode(BB); @@ -629,12 +636,38 @@ if (!L->contains(ChildBB)) ChildrenToUpdate.push_back(ChildBB); } - BasicBlock *NewIDom = DT->findNearestCommonDominator(BB, Latches[0]); + BasicBlock *NewIDom; + if (BB == LatchBlock) { + // The latch is special because we emit unconditional branches in + // some cases where the original loop contained a conditional branch. + // Since the latch is always at the bottom of the loop, if the latch + // dominated an exit before unrolling, the new dominator of that exit + // must also be a latch. Specifically, the dominator is the first + // latch which ends in a conditional branch, or the last latch if + // there is no such latch. + NewIDom = Latches.back(); + for (BasicBlock *IterLatch : Latches) { + TerminatorInst *Term = IterLatch->getTerminator(); + if (isa(Term) && cast(Term)->isConditional()) { + NewIDom = IterLatch; + break; + } + } + } else { + // The new idom of the block will be the nearest common dominator + // of all copies of the previous idom. This is equivalent to the + // nearest common dominator of the previous idom and the first latch, + // which dominates all copies of the previous idom. + NewIDom = DT->findNearestCommonDominator(BB, LatchBlock); + } for (auto *ChildBB : ChildrenToUpdate) DT->changeImmediateDominator(ChildBB, NewIDom); } } + if (DT && UnrollVerifyDomtree) + DT->verifyDomTree(); + // Merge adjacent basic blocks, if possible. SmallPtrSet ForgottenLoops; for (BasicBlock *Latch : Latches) { @@ -652,13 +685,6 @@ } } - // FIXME: We only preserve DT info for complete unrolling now. Incrementally - // updating domtree after partial loop unrolling should also be easy. - if (DT && !CompletelyUnroll) - DT->recalculate(*L->getHeader()->getParent()); - else if (DT) - DEBUG(DT->verifyDomTree()); - // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll && Count > 1) { SmallVector DeadInsts; @@ -718,8 +744,6 @@ // at least one layer outside of the loop that was unrolled so that any // changes to the parent loop exposed by the unrolling are considered. if (DT) { - if (!OuterL && !CompletelyUnroll) - OuterL = L; if (OuterL) { // OuterL includes all loops for which we can break loop-simplify, so // it's sufficient to simplify only it (it'll recursively simplify inner Index: llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -28,6 +28,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include @@ -257,7 +258,7 @@ /// optimizations. bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - bool PreserveLCSSA) { + AssumptionCache *AC, bool PreserveLCSSA) { if (!canPeel(L)) return false; @@ -404,10 +405,20 @@ LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); } + // FIXME: Incrementally update domtree. + DT->recalculate(*L->getHeader()->getParent()); + // If the loop is nested, we changed the parent loop, update SE. - if (Loop *ParentLoop = L->getParentLoop()) + if (Loop *ParentLoop = L->getParentLoop()) { SE->forgetLoop(ParentLoop); + // FIXME: Incrementally update loop-simplify + simplifyLoop(ParentLoop, DT, LI, SE, AC, PreserveLCSSA); + } else { + // FIXME: Incrementally update loop-simplify + simplifyLoop(L, DT, LI, SE, AC, PreserveLCSSA); + } + NumPeeled++; return true; Index: llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -146,6 +146,8 @@ // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); InsertPt->eraseFromParent(); + if (DT) + DT->changeImmediateDominator(Exit, PrologExit); } /// Connect the unrolling epilog code to the original loop. @@ -260,13 +262,20 @@ IRBuilder<> B(InsertPt); Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); assert(Exit && "Loop must have a single exit block only"); - // Split the exit to maintain loop canonicalization guarantees + // Split the epilogue exit to maintain loop canonicalization guarantees SmallVector Preds(predecessors(Exit)); SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, PreserveLCSSA); // Add the branch to the exit block (around the unrolling loop) B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); InsertPt->eraseFromParent(); + if (DT) + DT->changeImmediateDominator(Exit, NewExit); + + // Split the main loop exit to maintain canonicalization guarantees. + SmallVector NewExitPreds{Latch}; + SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, + PreserveLCSSA); } /// Create a clone of the blocks in a loop and connect them together. @@ -284,7 +293,7 @@ BasicBlock *Preheader, std::vector &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - LoopInfo *LI) { + DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -321,6 +330,17 @@ InsertTop->getTerminator()->setSuccessor(0, NewBB); } + if (DT) { + if (Header == *BB) { + // The header is dominated by the preheader. + DT->addNewBlock(NewBB, InsertTop); + } else { + // Copy information from original loop to unrolled loop. + BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); + DT->addNewBlock(NewBB, cast(VMap[IDomBB])); + } + } + if (Latch == *BB) { // For the last block, if CreateRemainderLoop is false, create a direct // jump to InsertBot. If not, create a loop back to cloned head. @@ -594,6 +614,12 @@ // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); + if (DT) { + if (UseEpilogRemainder) + DT->changeImmediateDominator(NewExit, PreHeader); + else + DT->changeImmediateDominator(PrologExit, PreHeader); + } Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code @@ -618,7 +644,7 @@ BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, - InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI); + InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(),