Index: include/llvm/Support/Debug.h =================================================================== --- include/llvm/Support/Debug.h +++ include/llvm/Support/Debug.h @@ -33,11 +33,6 @@ class raw_ostream; #ifndef NDEBUG -/// DebugFlag - This boolean is set to true if the '-debug' command line option -/// is specified. This should probably not be referenced directly, instead, use -/// the DEBUG macro below. -/// -extern bool DebugFlag; /// isCurrentDebugType - Return true if the specified string is the debug type /// specified on the command line, or if none was specified on the command line @@ -77,6 +72,29 @@ #define DEBUG_WITH_TYPE(TYPE, X) do { } while (false) #endif +/// This boolean is set to true if the '-debug' command line option +/// is specified. This should probably not be referenced directly, instead, use +/// the DEBUG macro below. +/// +extern bool DebugFlag; + +/// \name Verification flags. +/// +/// These flags turns on/off that are expensive and are turned off by default, +/// unless macro EXPENSIVE_CHECKS is defined. The flags allow selectively +/// turning the checks on without need to recompile. +/// \{ + +/// Enables verification of dominator trees. +/// +extern bool VerifyDomInfo; + +/// Enables verification of loop info. +/// +extern bool VerifyLoopInfo; + +///\} + /// EnableDebugBuffering - This defaults to false. If true, the debug /// stream will install signal handlers to dump any buffered debug /// output. It allows clients to selectively allow the debug stream Index: lib/Analysis/LoopInfo.cpp =================================================================== --- lib/Analysis/LoopInfo.cpp +++ lib/Analysis/LoopInfo.cpp @@ -40,9 +40,9 @@ // Always verify loopinfo if expensive checking is enabled. #ifdef EXPENSIVE_CHECKS -static bool VerifyLoopInfo = true; +bool llvm::VerifyLoopInfo = true; #else -static bool VerifyLoopInfo = false; +bool llvm::VerifyLoopInfo = false; #endif static cl::opt VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo), Index: lib/IR/Dominators.cpp =================================================================== --- lib/IR/Dominators.cpp +++ lib/IR/Dominators.cpp @@ -29,9 +29,9 @@ // Always verify dominfo if expensive checking is enabled. #ifdef EXPENSIVE_CHECKS -static bool VerifyDomInfo = true; +bool llvm::VerifyDomInfo = true; #else -static bool VerifyDomInfo = false; +bool llvm::VerifyDomInfo = false; #endif static cl::opt VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo), Index: lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- lib/Transforms/Utils/LoopUnroll.cpp +++ lib/Transforms/Utils/LoopUnroll.cpp @@ -612,6 +612,7 @@ Term->eraseFromParent(); } } + // Update dominators of blocks we might reach through exits. // Immediate dominator of such block might change, because we add more // routes which can lead to the exit: we can now reach it from the copied @@ -633,6 +634,11 @@ for (auto *ChildBB : ChildrenToUpdate) DT->changeImmediateDominator(ChildBB, NewIDom); } + // If runtime prolog is created, its conditional branch jumps around the + // block inserted for keeping LCSSA form. So the only predecessor of + // LoopExit is the last latch block. + if (!UnrollRuntimeEpilog && RuntimeTripCount) + DT->changeImmediateDominator(LoopExit, Latches.back()); } // Merge adjacent basic blocks, if possible. @@ -652,13 +658,6 @@ } } - // FIXME: We only preserve DT info for complete unrolling now. Incrementally - // updating domtree after partial loop unrolling should also be easy. - if (DT && !CompletelyUnroll) - DT->recalculate(*L->getHeader()->getParent()); - else if (DT) - DEBUG(DT->verifyDomTree()); - // Simplify any new induction variables in the partially unrolled loop. if (SE && !CompletelyUnroll && Count > 1) { SmallVector DeadInsts; @@ -746,6 +745,12 @@ for (Loop *SubLoop : LoopsToSimplify) simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA); } +#ifndef NDEBUG + if (VerifyDomInfo) + DT->verifyDomTree(); + if (VerifyLoopInfo) + LI->verify(*DT); +#endif } return true; Index: lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollPeel.cpp +++ lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -164,7 +164,8 @@ BasicBlock *InsertBot, BasicBlock *Exit, SmallVectorImpl &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - ValueToValueMapTy &LVMap, LoopInfo *LI) { + ValueToValueMapTy &LVMap, DominatorTree *DT, + LoopInfo *LI) { BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -185,6 +186,17 @@ ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; + + // If dominator tree is available, insert nodes to represent cloned blocks. + if (DT) { + if (Header == *BB) + DT->addNewBlock(NewBB, InsertTop); + else { + DomTreeNode *IDom = DT->getNode(*BB)->getIDom(); + DT->addNewBlock(NewBB, + cast(VMap[cast(IDom->getBlock())])); + } + } } // Hook-up the control flow for the newly inserted blocks. @@ -198,11 +210,22 @@ // The backedge now goes to the "bottom", which is either the loop's real // header (for the last peeled iteration) or the copied header of the next // iteration (for every other iteration) - BranchInst *LatchBR = - cast(cast(VMap[Latch])->getTerminator()); + BasicBlock *NewLatch = cast(VMap[Latch]); + BranchInst *LatchBR = cast(NewLatch->getTerminator()); unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); LatchBR->setSuccessor(HeaderIdx, InsertBot); LatchBR->setSuccessor(1 - HeaderIdx, Exit); + if (DT) { + DT->changeImmediateDominator(InsertBot, NewLatch); + BasicBlock *CurIDom = DT->getNode(Exit)->getIDom()->getBlock(); + BasicBlock *NewIDom = DT->findNearestCommonDominator(CurIDom, NewLatch); + DT->changeImmediateDominator(Exit, NewIDom); + } + +#ifndef NDEBUG + if (DT && VerifyDomInfo) + DT->verifyDomTree(); +#endif // The new copy of the loop body starts with a bunch of PHI nodes // that pick an incoming value from either the preheader, or the previous @@ -358,7 +381,7 @@ CurHeaderWeight = 1; cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit, - NewBlocks, LoopBlocks, VMap, LVMap, LI); + NewBlocks, LoopBlocks, VMap, LVMap, DT, LI); updateBranchWeights(InsertBot, cast(VMap[LatchBR]), Iter, PeelCount, ExitWeight); Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -146,6 +146,8 @@ // Add the branch to the exit block (around the unrolled loop) B.CreateCondBr(BrLoopExit, Exit, NewPreHeader); InsertPt->eraseFromParent(); + if (DT) + DT->changeImmediateDominator(Exit, PrologExit); } /// Connect the unrolling epilog code to the original loop. @@ -267,6 +269,8 @@ // Add the branch to the exit block (around the unrolling loop) B.CreateCondBr(BrLoopExit, EpilogPreHeader, Exit); InsertPt->eraseFromParent(); + if (DT) + DT->changeImmediateDominator(Exit, NewExit); } /// Create a clone of the blocks in a loop and connect them together. @@ -284,7 +288,7 @@ BasicBlock *Preheader, std::vector &NewBlocks, LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - LoopInfo *LI) { + DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -345,6 +349,20 @@ } LatchBR->eraseFromParent(); } + + // If dominator tree is available, insert nodes to represent cloned blocks. + // Note, that the dominator tree remains in inconsistent state, because + // the cloned block terminators points to original blocks. The consistency + // will be restored after instruction remap is made. + if (DT) { + if (Header == *BB) + DT->addNewBlock(NewBB, InsertTop); + else { + DomTreeNode *IDom = DT->getNode(*BB)->getIDom(); + DT->addNewBlock(NewBB, + cast(VMap[cast(IDom->getBlock())])); + } + } } // Change the incoming values to the ones defined in the preheader or @@ -371,6 +389,7 @@ NewPHI->setIncomingValue(idx, V); } } + if (NewLoop) { // Add unroll disable metadata to disable future unrolling for this loop. SmallVector MDs; @@ -594,6 +613,16 @@ // Branch to either remainder (extra iterations) loop or unrolling loop. B.CreateCondBr(BranchVal, RemainderLoop, UnrollingLoop); PreHeaderBR->eraseFromParent(); + if (DT) { + if (UseEpilogRemainder) + DT->changeImmediateDominator(NewExit, PreHeader); + else + DT->changeImmediateDominator(UnrollingLoop, PreHeader); +#ifndef NDEBUG + if (VerifyDomInfo) + DT->verifyDomTree(); +#endif + } Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code @@ -618,7 +647,7 @@ BasicBlock *InsertBot = UseEpilogRemainder ? Exit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, - InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, LI); + InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), @@ -683,6 +712,15 @@ VMap, DT, LI, PreserveLCSSA); } +#ifndef NDEBUG + if (DT) { + if (VerifyDomInfo) + DT->verifyDomTree(); + if (VerifyLoopInfo) + LI->verify(*DT); + } +#endif + // If this loop is nested, then the loop unroller changes the code in the // parent loop, so the Scalar Evolution pass needs to be run again. if (Loop *ParentLoop = L->getParentLoop()) Index: test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll =================================================================== --- test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll +++ test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unroll -unroll-count=3 -S | grep bb72.2 +; RUN: opt < %s -loop-unroll -unroll-count=3 -verify-dom-info -verify-loop-info -S | grep bb72.2 define void @foo(i32 %trips) { entry: Index: test/Transforms/LoopUnroll/peel-loop.ll =================================================================== --- test/Transforms/LoopUnroll/peel-loop.ll +++ test/Transforms/LoopUnroll/peel-loop.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=3 -simplifycfg -instcombine | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=3 -verify-dom-info -verify-loop-info -simplifycfg -instcombine | FileCheck %s ; Basic loop peeling - check that we can peel-off the first 3 loop iterations ; when explicitly requested. Index: test/Transforms/LoopUnroll/runtime-loop.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop.ll +++ test/Transforms/LoopUnroll/runtime-loop.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefix=EPILOG -; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG +; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -verify-dom-info -verify-loop-info | FileCheck %s -check-prefix=EPILOG +; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false -verify-dom-info -verify-loop-info | FileCheck %s -check-prefix=PROLOG target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" Index: test/Transforms/LoopUnroll/runtime-loop3.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop3.ll +++ test/Transforms/LoopUnroll/runtime-loop3.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -info-output-file - | FileCheck %s --check-prefix=STATS +; RUN: opt < %s -disable-output -stats -loop-unroll -unroll-runtime -unroll-threshold=400 -verify-dom-info -verify-loop-info -info-output-file - | FileCheck %s --check-prefix=STATS ; Test that nested loops can be unrolled. We need to increase threshold to do it