Index: include/llvm/Analysis/LoopInfo.h =================================================================== --- include/llvm/Analysis/LoopInfo.h +++ include/llvm/Analysis/LoopInfo.h @@ -362,6 +362,16 @@ /// invariant. bool hasLoopInvariantOperands(const Instruction *I) const; + // Return true if the specified value is either loop invariant or defined in + // the given subloop. + bool isLoopInvariantOutsideSubLoop(const Value *V, + const Loop *Sub) const; + + // Return true if all the operands of the specified instruction are loop + // invariant or defined in the given subloop. + bool hasLoopInvariantOperandsOutsideSubLoop(const Instruction *I, + const Loop *Sub) const; + /// If the given value is an instruction inside of the loop and it can be /// hoisted, do so to make it trivially loop-invariant. /// Return true if the value after any hoisting is loop invariant. This Index: include/llvm/Analysis/LoopPass.h =================================================================== --- include/llvm/Analysis/LoopPass.h +++ include/llvm/Analysis/LoopPass.h @@ -131,6 +131,7 @@ // Add a new loop into the loop queue as a child of the given parent, or at // the top level if \c ParentLoop is null. Loop &addLoop(Loop *ParentLoop); + void addExistingLoop(Loop *L, Loop *ParentLoop); //===--------------------------------------------------------------------===// /// SimpleAnalysis - Provides simple interface to update analysis info Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -35,6 +35,7 @@ class ScalarEvolution; class SCEV; class TargetLibraryInfo; +class LPPassManager; /// \brief Captures loop safety information. /// It keep information for loop & its header may throw exception. @@ -364,7 +365,7 @@ /// It returns changed status. bool sinkRegion(DomTreeNode *, AliasAnalysis *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, Loop *, AliasSetTracker *, - LICMSafetyInfo *); + LICMSafetyInfo *, LPPassManager *); /// \brief Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth Index: lib/Analysis/LoopInfo.cpp =================================================================== --- lib/Analysis/LoopInfo.cpp +++ lib/Analysis/LoopInfo.cpp @@ -61,6 +61,23 @@ return all_of(I->operands(), [this](Value *V) { return isLoopInvariant(V); }); } +bool Loop::isLoopInvariantOutsideSubLoop(const Value *V, + const Loop *Sub) const { + if (const Instruction *I = dyn_cast(V)) { + // It's defined in the subloop or outside the outer loop. + return Sub->contains(I) || !contains(I); + } + return true; +} + +bool Loop::hasLoopInvariantOperandsOutsideSubLoop(const Instruction *I, + const Loop *Sub) const { + return all_of(I->operands(), + [this, Sub](Value *V) { + return isLoopInvariantOutsideSubLoop(V, Sub); + }); +} + bool Loop::makeLoopInvariant(Value *V, bool &Changed, Instruction *InsertPt) const { if (Instruction *I = dyn_cast(V)) Index: lib/Analysis/LoopPass.cpp =================================================================== --- lib/Analysis/LoopPass.cpp +++ lib/Analysis/LoopPass.cpp @@ -68,16 +68,13 @@ } // Inset loop into loop nest (LoopInfo) and loop queue (LQ). -Loop &LPPassManager::addLoop(Loop *ParentLoop) { - // Create a new loop. LI will take ownership. - Loop *L = new Loop(); - +void LPPassManager::addExistingLoop(Loop *L, Loop *ParentLoop) { // Insert into the loop nest and the loop queue. if (!ParentLoop) { // This is the top level loop. LI->addTopLevelLoop(L); LQ.push_front(L); - return *L; + return; } ParentLoop->addChildLoop(L); @@ -90,6 +87,12 @@ break; } } +} + +Loop &LPPassManager::addLoop(Loop *ParentLoop) { + // Create a new loop. LI will take ownership. + Loop *L = new Loop(); + addExistingLoop(L, ParentLoop); return *L; } Index: lib/Transforms/Scalar/LICM.cpp =================================================================== --- lib/Transforms/Scalar/LICM.cpp +++ lib/Transforms/Scalar/LICM.cpp @@ -105,6 +105,7 @@ DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo); +static bool isTriviallyReplacablePHI(const PHINode &PN, const Instruction &I); namespace { struct LICM : public LoopPass { @@ -119,7 +120,6 @@ /// loop preheaders be inserted into the CFG... /// void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); AU.addRequired(); getLoopAnalysisUsage(AU); } @@ -210,7 +210,7 @@ // if (L->hasDedicatedExits()) Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop, - CurAST, &SafetyInfo); + CurAST, &SafetyInfo, &LPM); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, CurLoop, CurAST, &SafetyInfo); @@ -264,6 +264,261 @@ return Changed; } +// Safety checks for subloop movement, shared between hoisting and sinking. +// These conditions are necessary, but *not* sufficient to allow moving +// a subloop. +static bool canSinkOrHoistSubLoop(const Loop *SubLoop, const DominatorTree *DT, + const Loop *CurLoop, + const LICMSafetyInfo *SafetyInfo) { + // Only move loops one level at a time. + if (SubLoop->getParentLoop() != CurLoop) + return false; + + // Subloop being moved must have a single exit and entry block. + if (!SubLoop->getLoopPredecessor() || !SubLoop->getExitBlock() || + !SubLoop->getExitingBlock()) { + DEBUG(dbgs() << "Not hoisting/sinking subloop: " + << SubLoop->getHeader()->getName() + << ": Missing exit block, exiting block, or predecessor."); + return false; + } + + if (!SubLoop->hasDedicatedExits()) { + DEBUG(dbgs() << "Not hoisting/sinking subloop: " + << SubLoop->getHeader()->getName() << ": Exit block " + << SubLoop->getExitBlock()->getName() + << " has out-of-loop predecessor.\n"); + return false; + } + + if (!isGuaranteedToExecute(*SubLoop->getHeader()->begin(), DT, CurLoop, + SafetyInfo)) { + DEBUG(dbgs() << "Not hoisting/sinking subloop " + << SubLoop->getHeader()->getName() << " to " + << CurLoop->getHeader()->getName() + << ": not guaranteed to execute.\n"); + return false; + } + return true; +} + +static bool canSinkSubLoop(Loop *SubLoop, AliasAnalysis *AA, DominatorTree *DT, + TargetLibraryInfo *TLI, Loop *CurLoop, + AliasSetTracker *CurAST, + LICMSafetyInfo *SafetyInfo) { + if (!canSinkOrHoistSubLoop(SubLoop, DT, CurLoop, SafetyInfo)) + return false; + + assert(CurLoop->hasDedicatedExits()); // Checked in runOnLoop(). + + // Make sure the outer loop has a single exit block we can sink to. + if (!CurLoop->getExitBlock()) { + DEBUG(dbgs() << "Not sinking subloop: " << SubLoop->getHeader()->getName() + << ": Outer loop " << CurLoop->getHeader()->getName() + << " has no dedicated exit block.\n"); + return false; + } + + auto ExitBlock = SubLoop->getExitBlock(); + assert(ExitBlock); + // Find any LCSSA phi nodes and check they're not used inside the loop. + PHINode *PN; + for (auto II = ExitBlock->begin(); (PN = dyn_cast(II)); ++II) { + if (PN->getNumOperands() == 1 && + SubLoop->contains(PN->getIncomingBlock(0))) { + if (!isNotUsedInLoop(*PN, CurLoop, SafetyInfo)) { + DEBUG(dbgs() << "Not sinking subloop: " + << SubLoop->getHeader()->getName() + << ": Inner loop value used in outer loop.\n"); + return false; + } + } + } + + // Check we can sink each instruction inside the loop. + for (BasicBlock *BB : SubLoop->blocks()) { + for (Instruction &I : *BB) { + if (!isa(&I) && !isa(&I) && + !canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo)) { + DEBUG(dbgs() << "Not sinking subloop: " + << SubLoop->getHeader()->getName() << ": Can't sink " << I + << "\n"); + return false; + } + } + } + + return true; +} + +// Update all phi nodes for which the incoming block is no longer a predecessor +// of BB to refer to New. +static void replaceInvalidPHIOperandsWith(BasicBlock *BB, BasicBlock *New) { + PHINode *PN; + for (auto II = BB->begin(); (PN = dyn_cast(II)); ++II) { + for (unsigned Idx = 0; Idx < PN->getNumOperands(); ++Idx) { + BasicBlock *Incoming = PN->getIncomingBlock(Idx); + if (std::find(pred_begin(BB), pred_end(BB), Incoming) == pred_end(BB)) + PN->setIncomingBlock(Idx, New); + } + } +} + +static void removeSubLoop(Loop *SubLoop, DominatorTree *DT) { + // Remove from parent's subloop list. + Loop *Parent = SubLoop->getParentLoop(); + assert(Parent && "Subloop has no parent"); + auto &SubLoops = Parent->getSubLoops(); + BasicBlock *SubLoopHeader = SubLoop->getHeader(); + Loop::iterator it = std::find(SubLoops.begin(), SubLoops.end(), SubLoop); + assert(it != SubLoops.end() && *it == SubLoop); + SubLoop = Parent->removeChildLoop(it); + + auto OldExitBlock = SubLoop->getExitBlock(); + assert(OldExitBlock); + + // Create a new exit block, which will be hoisted along with the loop to + // preserve LCSSA. + BasicBlock *NewExitBlock = + BasicBlock::Create(SubLoopHeader->getContext(), + SubLoopHeader->getName() + ".licm_exit", + SubLoopHeader->getParent()); + DT->addNewBlock(NewExitBlock, SubLoop->getExitingBlock()); + + TerminatorInst *DummyTerm = + new UnreachableInst(NewExitBlock->getContext(), NewExitBlock); + + // Find any LCSSA phi nodes and add them to the new exit block. + std::vector PhisToMove; + PHINode *PN; + for (auto II = OldExitBlock->begin(); (PN = dyn_cast(II)); ++II) { + if (PN->getNumOperands() == 1 && + SubLoop->contains(PN->getIncomingBlock(0))) { + auto Inst = dyn_cast(PN->getIncomingValue(0)); + if (Inst && SubLoop->contains(Inst)) + PhisToMove.push_back(PN); + } + } + for (auto PN : PhisToMove) + PN->moveBefore(DummyTerm); + + // Point the subloop's predecessor at its exit block. + BasicBlock *SubLoopPredecessor = SubLoop->getLoopPredecessor(); + assert(SubLoopPredecessor); + SubLoopPredecessor->getTerminator()->replaceUsesOfWith(SubLoopHeader, + OldExitBlock); + + auto ExitingBlock = SubLoop->getExitingBlock(); + for (auto II = OldExitBlock->begin(); isa(II); ++II) + II->replaceUsesOfWith(ExitingBlock, SubLoopPredecessor); + + // Branch from the subloop's exiting block to the new exit block. + for (auto BBI = succ_begin(ExitingBlock), BBE = succ_end(ExitingBlock); + BBI != BBE; ++BBI) { + if (!SubLoop->contains(*BBI)) { + ExitingBlock->getTerminator()->replaceUsesOfWith(*BBI, NewExitBlock); + } + } + + for (auto *BB : SubLoop->blocks()) + Parent->removeBlockFromLoop(BB); + + // We never need to free a subloop's AST here - because we only ever move + // *sub*loops, the AST is always freed in collectAliasInfoForLoop(), even if + // we're moving this subloop to level 0. + + // The old exit block is now dominated by the subloop's old predecessor. + DT->changeImmediateDominator(OldExitBlock, SubLoopPredecessor); +} + +// Insert the subloop after the phis in the outer loop's exit block. +static void sinkSubLoopToExit(Loop *SubLoop, LoopInfo *LI, DominatorTree *DT, + Loop *CurLoop, LPPassManager *LPM) { + BasicBlock *SubLoopHeader = SubLoop->getHeader(); + BasicBlock *ExitBlock = CurLoop->getExitBlock(); + BasicBlock *SubLoopExitBlock = SubLoop->getExitBlock(); + assert(SubLoopHeader && ExitBlock && SubLoopExitBlock); + + DEBUG(dbgs() << "LICM sinking to " << ExitBlock->getName() << ": "); + DEBUG(SubLoop->dump()); + + // Split the outer loop's exit block into phi and non-phi parts. + BasicBlock *AfterSubLoop = ExitBlock->splitBasicBlock( + ExitBlock->getFirstNonPHI(), ExitBlock->getName() + ".licm_post_subloop"); + DT->addNewBlock(AfterSubLoop, SubLoopExitBlock); + + // Like in hoisting, fix up invalid phis in the subloop header which used to + // refer to the loop's predecessors inside the outer loop. + replaceInvalidPHIOperandsWith(SubLoopHeader, ExitBlock); + + // The outer loop's exit block may contain LCSSA phi nodes referring to + // values defined in the inner loop. But because we've moved the inner loop + // to *after* these nodes, they are invalid. RAUW with the *inner* loop's + // LCSSA node. + std::vector InstructionsToErase; + PHINode *PN; + for (auto II = ExitBlock->begin(); (PN = dyn_cast(II)); ++II) { + auto Incoming = dyn_cast(PN->getIncomingValue(0)); + if (Incoming && PN->getNumOperands() > 0 && + isTriviallyReplacablePHI(*PN, *Incoming)) { + if (Incoming->getParent() == SubLoopExitBlock) { + PN->replaceAllUsesWith(Incoming); + InstructionsToErase.push_back(PN); + } + } + } + for (auto I : InstructionsToErase) + I->eraseFromParent(); + + // Branch from the sunk subloop's exit block to the non-phi part of the + // original exit block. + ExitBlock->getTerminator()->moveBefore(SubLoopExitBlock->getTerminator()); + SubLoopExitBlock->getTerminator()->eraseFromParent(); + + // Branch from the outer loop's exit block to the sunk subloop. + BranchInst::Create(SubLoopHeader, ExitBlock); + + // The sunk subloop may use values which were defined in the outer loop. To + // preserve LCSSA, find these and add phis in the outer loop's exit block. + SmallVector ExitingBlocks; + CurLoop->getExitingBlocks(ExitingBlocks); + for (BasicBlock *BB : SubLoop->blocks()) { + for (Instruction &I : *BB) { + for (unsigned Idx = 0; Idx < I.getNumOperands(); ++Idx) { + auto Op = dyn_cast(I.getOperand(Idx)); + if (Op && CurLoop->contains(Op->getParent())) { + auto PN = + PHINode::Create(Op->getType(), 0, Op->getName() + ".licm_lcssa", + &*ExitBlock->begin()); + for (BasicBlock *ExitingBlock : ExitingBlocks) { + PN->addIncoming(Op, ExitingBlock); + } + I.setOperand(Idx, PN); + } + } + } + } + + Loop *ParentLoop = CurLoop->getParentLoop(); + if (ParentLoop) { + ParentLoop->addBasicBlockToLoop(SubLoopExitBlock, *LI); + ParentLoop->addBasicBlockToLoop(AfterSubLoop, *LI); + } + + assert(LPM); + LPM->addExistingLoop(SubLoop, CurLoop->getParentLoop()); + + // The old outer exit block's children are dominated by the new exit block. + DomTreeNode *OldExitNode = (*DT)[ExitBlock]; + const std::vector Children(OldExitNode->getChildren().begin(), + OldExitNode->getChildren().end()); + for (DomTreeNode *Child : Children) + DT->changeImmediateDominator(Child, (*DT)[AfterSubLoop]); + + DT->changeImmediateDominator(SubLoopHeader, ExitBlock); + DT->changeImmediateDominator(AfterSubLoop, SubLoopExitBlock); +} + /// Walk the specified region of the CFG (defined by all blocks dominated by /// the specified block, and that are in the current loop) in reverse depth /// first order w.r.t the DominatorTree. This allows us to visit uses before @@ -271,7 +526,8 @@ /// bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, Loop *CurLoop, - AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo) { + AliasSetTracker *CurAST, LICMSafetyInfo *SafetyInfo, + LPPassManager *LPM) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && @@ -285,14 +541,34 @@ // We are processing blocks in reverse dfo, so process children first. bool Changed = false; - const std::vector &Children = N->getChildren(); + const std::vector Children(N->getChildren().begin(), + N->getChildren().end()); for (DomTreeNode *Child : Children) - Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo); + Changed |= + sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo, LPM); // Only need to process the contents of this block if it is not part of a - // subloop (which would already have been processed). - if (inSubLoop(BB, CurLoop, LI)) + // subloop (which would already have been processed). However, we might be + // able to sink the entire subloop. + if (inSubLoop(BB, CurLoop, LI)) { + Loop *SubLoop = LI->getLoopFor(BB); + + // Make sure this is the dominating block. + if (SubLoop->getHeader() == BB) { + if (canSinkSubLoop(SubLoop, AA, DT, TLI, CurLoop, CurAST, SafetyInfo)) { + removeSubLoop(SubLoop, DT); + sinkSubLoopToExit(SubLoop, LI, DT, CurLoop, LPM); +#ifndef NDEBUG + SubLoop->verifyLoop(); + CurLoop->verifyLoop(); + DT->verifyDomTree(); + LI->verify(); +#endif + Changed = true; + } + } return Changed; + } for (BasicBlock::iterator II = BB->end(); II != BB->begin();) { Instruction &I = *--II; Index: test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll =================================================================== --- test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll +++ test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll @@ -2,7 +2,7 @@ ; PR11882: ComputeLoadConstantCompareExitLimit crash. ; ; for.body is deleted leaving a loop-invariant load. -; CHECK-NOT: for.body +; CHECK-NOT: for.body: target datalayout = "e-p:64:64:64-n32:64" @func_21_l_773 = external global i32, align 4 Index: test/Transforms/LICM/inner-loop-dont-sink.ll =================================================================== --- /dev/null +++ test/Transforms/LICM/inner-loop-dont-sink.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -S -licm | FileCheck %s + +define void @main() #0 { +; CHECK-LABEL: @main( +entry: +; CHECK: entry: +; CHECK-NEXT: br label %outer.header + br label %outer.header + +outer.header: +; CHECK: outer.header: +; CHECK: br i1 undef, label %if.then, label %outer.latch + br i1 undef, label %if.then, label %outer.latch + +if.then: +; CHECK: if.then: + %call = tail call i32 @generate() +; CHECK: br label %inner.body + br label %inner.body + +inner.body: +; CHECK: inner.body: + %arrayctor.done = icmp eq i32 undef, %call +; CHECK: br i1 %arrayctor.done, label %outer.latch.loopexit, label %inner.body + br i1 %arrayctor.done, label %outer.latch, label %inner.body + +outer.latch: + br i1 undef, label %outer.exit, label %outer.header + +outer.exit: + ret void +} + +declare i32 @generate() #1 Index: test/Transforms/LICM/inner-loop-sink-multiple-phi.ll =================================================================== --- /dev/null +++ test/Transforms/LICM/inner-loop-sink-multiple-phi.ll @@ -0,0 +1,47 @@ +; RUN: opt < %s -S -licm -loop-unswitch | FileCheck %s + +define void @main() #0 { +; CHECK-LABEL: @main( +entry: +; CHECK: entry: +; CHECK: br label %outer.header + br label %outer.header + +outer.header: + br label %inner.body + +inner.body: +; CHECK: inner.body: +; CHECK-NEXT: %y = phi i32 [ 0, %outer.exit ], [ %add1, %inner.body ] +; CHECK-NEXT: %z = phi i32 [ 0, %outer.exit ], [ %add2, %inner.body ] + %y = phi i32 [ 0, %outer.header ], [ %add1, %inner.body ] + %z = phi i32 [ 0, %outer.header ], [ %add2, %inner.body ] + %add1 = add i32 %y, 7 + %add2 = add i32 %z, 11 +; CHECK: br i1 false, label %inner.body, label %inner.body.licm_exit + br i1 undef, label %inner.body, label %outer.inc + +outer.inc: +; CHECK: outer.inc: +; CHECK-NEXT: br i1 false, label %outer.header, label %outer.exit + %y.lcssa = phi i32 [ %add1, %inner.body ] + %z.lcssa = phi i32 [ %add2, %inner.body ] + br i1 undef, label %outer.header, label %outer.exit + +outer.exit: +; CHECK: outer.exit: +; CHECK-NEXT: br label %inner.body + %y.0.lcssa = phi i32 [ %y.lcssa, %outer.inc ] + %z.0.lcssa = phi i32 [ %z.lcssa, %outer.inc ] +; CHECK: outer.exit.licm_post_subloop: +; CHECK-NEXT: br label %return + br label %return + +return: + ret void + +; CHECK: inner.body.licm_exit: +; CHECK-NEXT: %y.lcssa = phi i32 [ %add1, %inner.body ] +; CHECK-NEXT: %z.lcssa = phi i32 [ %add2, %inner.body ] +; CHECK-NEXT: br label %outer.exit.licm_post_subloop +}