Index: llvm/include/llvm/Analysis/LoopInfo.h =================================================================== --- llvm/include/llvm/Analysis/LoopInfo.h +++ llvm/include/llvm/Analysis/LoopInfo.h @@ -330,6 +330,8 @@ void verifyLoopNest(DenseSet *Loops) const; void print(raw_ostream &OS, unsigned Depth = 0) const; + /// Print loop with all the BBs inside it. + void printVerbose(raw_ostream &OS, unsigned Depth = 0) const; protected: friend class LoopInfoBase; @@ -451,6 +453,7 @@ BasicBlock *getUniqueExitBlock() const; void dump() const; + void dumpVerbose() const; /// Return the debug location of the start of this loop. /// This looks for a BB terminating instruction with a known debug Index: llvm/include/llvm/Analysis/LoopInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/LoopInfoImpl.h +++ llvm/include/llvm/Analysis/LoopInfoImpl.h @@ -252,6 +252,8 @@ HasInsideLoopSuccs = true; break; } + assert(HasInsideLoopSuccs && "Loop block has no in-loop successors!"); + typedef GraphTraits > InvBlockTraits; for (typename InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(BB), PE = InvBlockTraits::child_end(BB); @@ -262,6 +264,7 @@ else OutsideLoopPreds.push_back(N); } + assert(HasInsideLoopPreds && "Loop block has no in-loop predecessors!"); if (BB == getHeader()) { assert(!OutsideLoopPreds.empty() && "Loop is unreachable!"); @@ -275,8 +278,6 @@ assert(CB != OutsideLoopPreds[i] && "Loop has multiple entry points!"); } - assert(HasInsideLoopPreds && "Loop block has no in-loop predecessors!"); - assert(HasInsideLoopSuccs && "Loop block has no in-loop successors!"); assert(BB != &getHeader()->getParent()->front() && "Loop contains function entry block!"); @@ -334,6 +335,27 @@ (*I)->print(OS, Depth+2); } +template +void LoopBase::printVerbose(raw_ostream &OS, + unsigned Depth) const { + OS.indent(Depth*2) << "Loop at depth " << getLoopDepth() + << " containing: "; + + BlockT *H = getHeader(); + BlockT *L = getLoopLatch(); + for (unsigned i = 0; i < getBlocks().size(); ++i) { + BlockT *BB = getBlocks()[i]; + if (BB == H) OS << "
\n"; + if (BB == L) OS << "\n"; + if (isLoopExiting(BB)) OS << "\n"; + BB->print(OS); + } + OS << "\n"; + + for (iterator I = begin(), E = end(); I != E; ++I) + (*I)->print(OS, Depth+2); +} + //===----------------------------------------------------------------------===// /// Stable LoopInfo Analysis - Build a loop tree using stable iterators so the /// result does / not depend on use list (block predecessor) order. Index: llvm/include/llvm/Transforms/Utils/Cloning.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Cloning.h +++ llvm/include/llvm/Transforms/Utils/Cloning.h @@ -231,6 +231,22 @@ void remapInstructionsInBlocks(const SmallVectorImpl &Blocks, ValueToValueMapTy &VMap); +/// \brief Returns true of the region formed by [Entry, Exit] is +/// a single-entry-single-exit (SESE) region. All the traces +/// from \p Entry to \p Exit should be dominated by \p Entry +/// and post-dominated by \p Exit. +bool isSESE(const BasicBlock *Entry, const BasicBlock *Exit, DominatorTree *DT, + DominatorTree *PDT); + +/// \brief Returns true of the region formed by [Entry, Exit] is +/// a single-entry-multiple-exit (SEME) region. All the traces +/// from \p Entry which leads to the \p Exit are analyzed. +bool isSEME(const BasicBlock *Entry, const BasicBlock *Exit, DominatorTree *DT); + +BasicBlock* copySEME(const SmallVectorImpl &Blocks, + const SmallPtrSetImpl &Exits, + ValueToValueMapTy &VMap, const Twine &NameSuffix, + DominatorTree *DT, LoopInfo *LI); } // End llvm namespace #endif Index: llvm/lib/Analysis/LoopInfo.cpp =================================================================== --- llvm/lib/Analysis/LoopInfo.cpp +++ llvm/lib/Analysis/LoopInfo.cpp @@ -387,6 +387,10 @@ LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); } + +LLVM_DUMP_METHOD void Loop::dumpVerbose() const { + printVerbose(dbgs()); +} #endif //===----------------------------------------------------------------------===// Index: llvm/lib/Transforms/Scalar/LoopRotation.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -7,7 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This file implements Loop Rotation Pass. +// This file implements Loop Rotation Pass: +// 1. Canonicalize loop latch to have only one successor. +// 2. Clone all the BBs which are exiting the loop. +// 3. Adjust phi of cloned BBs +// 4. Add phi to the new loop header +// 5. Update DOM // //===----------------------------------------------------------------------===// @@ -30,11 +35,13 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -43,13 +50,51 @@ #define DEBUG_TYPE "loop-rotate" -static cl::opt DefaultRotationThreshold( +static cl::opt RotationMaxHeaderSize( "rotation-max-header-size", cl::init(16), cl::Hidden, cl::desc("The default maximum header size for automatic loop rotation")); +static cl::opt RotationMaxSize( + "rotation-max-size", cl::init(100), cl::Hidden, + cl::desc("The default maximum loop size for automatic loop rotation")); + +static cl::opt MaxLoopsRotated( + "max-loops-rotated", cl::init(-1), cl::Hidden, + cl::desc("The maximum loops to be rotated (-1 means no limit)")); + +static int LoopsRotated = 0; + STATISTIC(NumRotated, "Number of loops rotated"); -namespace { +static void insertBetween(BasicBlock *NewBB, BasicBlock *PredBefore, + BasicBlock *Succ) { + BranchInst *NewBI = BranchInst::Create(Succ, NewBB); + NewBI->setDebugLoc(PredBefore->getTerminator()->getDebugLoc()); + + BranchInst *BLI = dyn_cast(PredBefore->getTerminator()); + for (unsigned I = 0, E = BLI->getNumSuccessors(); I < E; ++I) + if (BLI->getSuccessor(I) == Succ) { + BLI->setSuccessor(I, NewBB); + break; + } + // Move NewBB physically from the end of the block list. + Function *F = Succ->getParent(); + F->getBasicBlockList().splice(Succ->getIterator(), F->getBasicBlockList(), + NewBB); +} + +// Remove the arguments of all phi nodes in PhiBB coming from block From. + +static void discardIncomingValues(BasicBlock *PhiBB, BasicBlock *From) { + for (BasicBlock::iterator I = PhiBB->begin(), + E = PhiBB->end(); I != E; ++I) { + PHINode *PN = dyn_cast(I); + if (!PN) + break; + PN->removeIncomingValue(PN->getBasicBlockIndex(From)); + } +} + /// A simple loop rotation transformation. class LoopRotate { const unsigned MaxHeaderSize; @@ -57,564 +102,347 @@ const TargetTransformInfo *TTI; AssumptionCache *AC; DominatorTree *DT; - ScalarEvolution *SE; + Function *F; + Loop *L; public: LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, - DominatorTree *DT, ScalarEvolution *SE) - : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE) { - } - bool processLoop(Loop *L); + DominatorTree *DT, Function *F, Loop *L) + : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), + F(F), L(L) {} + bool processLoop(); private: - bool rotateLoop(Loop *L, bool SimplifiedLatch); - bool simplifyLoopLatch(Loop *L); -}; -} // end anonymous namespace - -/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the -/// old header into the preheader. If there were uses of the values produced by -/// these instruction that were outside of the loop, we have to insert PHI nodes -/// to merge the two values. Do this now. -static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, - BasicBlock *OrigPreheader, - ValueToValueMapTy &ValueMap) { - // Remove PHI node entries that are no longer live. - BasicBlock::iterator I, E = OrigHeader->end(); - for (I = OrigHeader->begin(); PHINode *PN = dyn_cast(I); ++I) - PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); - - // Now fix up users of the instructions in OrigHeader, inserting PHI nodes - // as necessary. - SSAUpdater SSA; - for (I = OrigHeader->begin(); I != E; ++I) { - Value *OrigHeaderVal = &*I; - - // If there are no uses of the value (e.g. because it returns void), there - // is nothing to rewrite. - if (OrigHeaderVal->use_empty()) - continue; + void rotateLoop(); - Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal); - - // The value now exits in two versions: the initial value in the preheader - // and the loop "next" value in the original header. - SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); - SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); - SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal); - - // Visit each use of the OrigHeader instruction. - for (Value::use_iterator UI = OrigHeaderVal->use_begin(), - UE = OrigHeaderVal->use_end(); - UI != UE;) { - // Grab the use before incrementing the iterator. - Use &U = *UI; - - // Increment the iterator before removing the use from the list. - ++UI; - - // SSAUpdater can't handle a non-PHI use in the same block as an - // earlier def. We can easily handle those cases manually. - Instruction *UserInst = cast(U.getUser()); - if (!isa(UserInst)) { - BasicBlock *UserBB = UserInst->getParent(); - - // The original users in the OrigHeader are already using the - // original definitions. - if (UserBB == OrigHeader) - continue; + void addPhis(SmallVectorImpl &Blocks, ValueToValueMapTy &VMap, + const Twine &NameSuffix, BasicBlock *NewHeader, + BasicBlock *NewPreheader, BasicBlock *NewLatch); - // Users in the OrigPreHeader need to use the value to which the - // original definitions are mapped. - if (UserBB == OrigPreheader) { - U = OrigPreHeaderVal; - continue; - } - } + bool isProfitableToRotate(); - // Anything else can be handled by SSAUpdater. - SSA.RewriteUse(U); - } + bool isLegalToRotate(); - // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug - // intrinsics. - LLVMContext &C = OrigHeader->getContext(); - if (auto *VAM = ValueAsMetadata::getIfExists(OrigHeaderVal)) { - if (auto *MAV = MetadataAsValue::getIfExists(C, VAM)) { - for (auto UI = MAV->use_begin(), E = MAV->use_end(); UI != E;) { - // Grab the use before incrementing the iterator. Otherwise, altering - // the Use will invalidate the iterator. - Use &U = *UI++; - DbgInfoIntrinsic *UserInst = dyn_cast(U.getUser()); - if (!UserInst) - continue; + void adjustNewHeaderPhis(ValueToValueMapTy &VMap, BasicBlock *NewH, + BasicBlock *NewPH); - // The original users in the OrigHeader are already using the original - // definitions. - BasicBlock *UserBB = UserInst->getParent(); - if (UserBB == OrigHeader) - continue; + BasicBlock *collectSEMEBlocks(BasicBlock *OrigH, BasicBlock *OrigLatch, + SmallVectorImpl &Blocks, + SmallPtrSetImpl &Exits); - // Users in the OrigPreHeader need to use the value to which the - // original definitions are mapped and anything else can be handled by - // the SSAUpdater. To avoid adding PHINodes, check if the value is - // available in UserBB, if not substitute undef. - Value *NewVal; - if (UserBB == OrigPreheader) - NewVal = OrigPreHeaderVal; - else if (SSA.HasValueForBlock(UserBB)) - NewVal = SSA.GetValueInMiddleOfBlock(UserBB); - else - NewVal = UndefValue::get(OrigHeaderVal->getType()); - U = MetadataAsValue::get(C, ValueAsMetadata::get(NewVal)); - } - } - } - } -} + void addPhisToNewHeader(SmallVectorImpl &Blocks, + BasicBlock *NewHeader, BasicBlock *NewPreheader, + BasicBlock *NewLatch, ValueToValueMapTy &VMap); + +}; -/// Rotate loop LP. Return true if the loop is rotated. -/// -/// \param SimplifiedLatch is true if the latch was just folded into the final -/// loop exit. In this case we may want to rotate even though the new latch is -/// now an exiting branch. This rotation would have happened had the latch not -/// been simplified. However, if SimplifiedLatch is false, then we avoid -/// rotating loops in which the latch exits to avoid excessive or endless -/// rotation. LoopRotate should be repeatable and converge to a canonical -/// form. This property is satisfied because simplifying the loop latch can only -/// happen once across multiple invocations of the LoopRotate pass. -bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { +bool LoopRotate::isLegalToRotate() { // If the loop has only one block then there is not much to rotate. if (L->getBlocks().size() == 1) return false; - BasicBlock *OrigHeader = L->getHeader(); - BasicBlock *OrigLatch = L->getLoopLatch(); - - BranchInst *BI = dyn_cast(OrigHeader->getTerminator()); - if (!BI || BI->isUnconditional()) - return false; - // If the loop header is not one of the loop exiting blocks then // either this loop is already rotated or it is not // suitable for loop rotation transformations. + const BasicBlock *OrigHeader = L->getHeader(); if (!L->isLoopExiting(OrigHeader)) return false; - // If the loop latch already contains a branch that leaves the loop then the - // loop is already rotated. - if (!OrigLatch) - return false; - - // Rotate if either the loop latch does *not* exit the loop, or if the loop - // latch was just simplified. - if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch) + const BranchInst *BI = dyn_cast(OrigHeader->getTerminator()); + if (!BI || BI->isUnconditional()) return false; - // Check size of original header and reject loop if it is very big or we can't - // duplicate blocks inside it. - { - SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(L, AC, EphValues); - - CodeMetrics Metrics; - Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); - if (Metrics.notDuplicatable) { - DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" - << " instructions: "; - L->dump()); - return false; - } - if (Metrics.convergent) { - DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent " - "instructions: "; - L->dump()); - return false; - } - if (Metrics.NumInsts > MaxHeaderSize) - return false; - } - - // Now, this loop is suitable for rotation. - BasicBlock *OrigPreheader = L->getLoopPreheader(); - // If the loop could not be converted to canonical form, it must have an // indirectbr in it, just give up. - if (!OrigPreheader) + if (!L->getLoopPreheader()) return false; - // Anything ScalarEvolution may know about this loop or the PHI nodes - // in its header will soon be invalidated. - if (SE) - SE->forgetLoop(L); + const BasicBlock *LoopLatch = L->getLoopLatch(); + if (!isSEME(OrigHeader, LoopLatch, DT)) + return false; - DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); + return true; +} - // Find new Loop header. NewHeader is a Header's one and only successor - // that is inside loop. Header's other successor is outside the - // loop. Otherwise loop is not suitable for rotation. - BasicBlock *Exit = BI->getSuccessor(0); - BasicBlock *NewHeader = BI->getSuccessor(1); - if (L->contains(Exit)) - std::swap(Exit, NewHeader); - assert(NewHeader && "Unable to determine new loop header"); - assert(L->contains(NewHeader) && !L->contains(Exit) && - "Unable to determine loop header and exit blocks"); - - // This code assumes that the new header has exactly one predecessor. - // Remove any single-entry PHI nodes in it. - assert(NewHeader->getSinglePredecessor() && - "New header doesn't have one pred!"); - FoldSingleEntryPHINodes(NewHeader); - - // Begin by walking OrigHeader and populating ValueMap with an entry for - // each Instruction. - BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); - ValueToValueMapTy ValueMap; - - // For PHI nodes, the value available in OldPreHeader is just the - // incoming value from OldPreHeader. - for (; PHINode *PN = dyn_cast(I); ++I) - ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); - - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - - // For the rest of the instructions, either hoist to the OrigPreheader if - // possible or create a clone in the OldPreHeader if not. - TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); - while (I != E) { - Instruction *Inst = &*I++; - - // If the instruction's operands are invariant and it doesn't read or write - // memory, then it is safe to hoist. Doing this doesn't change the order of - // execution in the preheader, but does prevent the instruction from - // executing in each iteration of the loop. This means it is safe to hoist - // something that might trap, but isn't safe to hoist something that reads - // memory (without proving that the loop doesn't write). - if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && - !Inst->mayWriteToMemory() && !isa(Inst) && - !isa(Inst) && !isa(Inst)) { - Inst->moveBefore(LoopEntryBranch); - continue; - } +// FIXME: count the number of instructions in Blocks and discard when reaching +// upper bound. +bool LoopRotate::isProfitableToRotate() { + const BasicBlock *OrigHeader = L->getHeader(); - // Otherwise, create a duplicate of the instruction. - Instruction *C = Inst->clone(); - - // Eagerly remap the operands of the instruction. - RemapInstruction(C, ValueMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - - // With the operands remapped, see if the instruction constant folds or is - // otherwise simplifyable. This commonly occurs because the entry from PHI - // nodes allows icmps and other instructions to fold. - // FIXME: Provide TLI, DT, AC to SimplifyInstruction. - Value *V = SimplifyInstruction(C, DL); - if (V && LI->replacementPreservesLCSSAForm(C, V)) { - // If so, then delete the temporary instruction and stick the folded value - // in the map. - ValueMap[Inst] = V; - if (!C->mayHaveSideEffects()) { - delete C; - C = nullptr; - } - } else { - ValueMap[Inst] = C; - } - if (C) { - // Otherwise, stick the new instruction into the new block! - C->setName(Inst->getName()); - C->insertBefore(LoopEntryBranch); - } + // Check size of original header and reject loop if it is very big or we can't + // duplicate blocks inside it. + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(L, AC, EphValues); + + CodeMetrics Metrics; + Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); + if (Metrics.notDuplicatable) { + DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" + << " instructions: "; + L->dump()); + return false; } - // Along with all the other instructions, we just cloned OrigHeader's - // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's - // successors by duplicating their incoming values for OrigHeader. - TerminatorInst *TI = OrigHeader->getTerminator(); - for (BasicBlock *SuccBB : TI->successors()) - for (BasicBlock::iterator BI = SuccBB->begin(); - PHINode *PN = dyn_cast(BI); ++BI) - PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); - - // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove - // OrigPreHeader's old terminator (the original branch into the loop), and - // remove the corresponding incoming values from the PHI nodes in OrigHeader. - LoopEntryBranch->eraseFromParent(); - - // If there were any uses of instructions in the duplicated block outside the - // loop, update them, inserting PHI nodes as required - RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap); - - // NewHeader is now the header of the loop. - L->moveToHeader(NewHeader); - assert(L->getHeader() == NewHeader && "Latch block is our new header"); - - // At this point, we've finished our major CFG changes. As part of cloning - // the loop into the preheader we've simplified instructions and the - // duplicated conditional branch may now be branching on a constant. If it is - // branching on a constant and if that constant means that we enter the loop, - // then we fold away the cond branch to an uncond branch. This simplifies the - // loop in cases important for nested loops, and it also means we don't have - // to split as many edges. - BranchInst *PHBI = cast(OrigPreheader->getTerminator()); - assert(PHBI->isConditional() && "Should be clone of BI condbr!"); - if (!isa(PHBI->getCondition()) || - PHBI->getSuccessor(cast(PHBI->getCondition())->isZero()) != - NewHeader) { - // The conditional branch can't be folded, handle the general case. - // Update DominatorTree to reflect the CFG change we just made. Then split - // edges as necessary to preserve LoopSimplify form. - if (DT) { - // Everything that was dominated by the old loop header is now dominated - // by the original loop preheader. Conceptually the header was merged - // into the preheader, even though we reuse the actual block as a new - // loop latch. - DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); - SmallVector HeaderChildren(OrigHeaderNode->begin(), - OrigHeaderNode->end()); - DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader); - for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) - DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode); - - assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode); - assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode); - - // Update OrigHeader to be dominated by the new header block. - DT->changeImmediateDominator(OrigHeader, OrigLatch); - } - - // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and - // thus is not a preheader anymore. - // Split the edge to form a real preheader. - BasicBlock *NewPH = SplitCriticalEdge( - OrigPreheader, NewHeader, - CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); - NewPH->setName(NewHeader->getName() + ".lr.ph"); - - // Preserve canonical loop form, which means that 'Exit' should have only - // one predecessor. Note that Exit could be an exit block for multiple - // nested loops, causing both of the edges to now be critical and need to - // be split. - SmallVector ExitPreds(pred_begin(Exit), pred_end(Exit)); - bool SplitLatchEdge = false; - for (BasicBlock *ExitPred : ExitPreds) { - // We only need to split loop exit edges. - Loop *PredLoop = LI->getLoopFor(ExitPred); - if (!PredLoop || PredLoop->contains(Exit)) - continue; - if (isa(ExitPred->getTerminator())) - continue; - SplitLatchEdge |= L->getLoopLatch() == ExitPred; - BasicBlock *ExitSplit = SplitCriticalEdge( - ExitPred, Exit, - CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); - ExitSplit->moveBefore(Exit); - } - assert(SplitLatchEdge && - "Despite splitting all preds, failed to split latch exit?"); - } else { - // We can fold the conditional branch in the preheader, this makes things - // simpler. The first step is to remove the extra edge to the Exit block. - Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); - BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); - NewBI->setDebugLoc(PHBI->getDebugLoc()); - PHBI->eraseFromParent(); - - // With our CFG finalized, update DomTree if it is available. - if (DT) { - // Update OrigHeader to be dominated by the new header block. - DT->changeImmediateDominator(NewHeader, OrigPreheader); - DT->changeImmediateDominator(OrigHeader, OrigLatch); - - // Brute force incremental dominator tree update. Call - // findNearestCommonDominator on all CFG predecessors of each child of the - // original header. - DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); - SmallVector HeaderChildren(OrigHeaderNode->begin(), - OrigHeaderNode->end()); - bool Changed; - do { - Changed = false; - for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) { - DomTreeNode *Node = HeaderChildren[I]; - BasicBlock *BB = Node->getBlock(); - - pred_iterator PI = pred_begin(BB); - BasicBlock *NearestDom = *PI; - for (pred_iterator PE = pred_end(BB); PI != PE; ++PI) - NearestDom = DT->findNearestCommonDominator(NearestDom, *PI); - - // Remember if this changes the DomTree. - if (Node->getIDom()->getBlock() != NearestDom) { - DT->changeImmediateDominator(BB, NearestDom); - Changed = true; - } - } - - // If the dominator changed, this may have an effect on other - // predecessors, continue until we reach a fixpoint. - } while (Changed); - } + if (Metrics.convergent) { + DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent " + "instructions: "; + L->dump()); + return false; } - assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); - assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); - - // Now that the CFG and DomTree are in a consistent state again, try to merge - // the OrigHeader block into OrigLatch. This will succeed if they are - // connected by an unconditional branch. This is just a cleanup so the - // emitted code isn't too gross in this common case. - MergeBlockIntoPredecessor(OrigHeader, DT, LI); + if (Metrics.NumInsts > MaxHeaderSize) + return false; - DEBUG(dbgs() << "LoopRotation: into "; L->dump()); + unsigned LoopSize = MaxHeaderSize; + for(const BasicBlock* BB : L->getBlocks()) { + if (BB->size() + LoopSize < RotationMaxSize) + LoopSize += BB->size(); + else + return false; + } - ++NumRotated; return true; } -/// Determine whether the instructions in this range may be safely and cheaply -/// speculated. This is not an important enough situation to develop complex -/// heuristics. We handle a single arithmetic instruction along with any type -/// conversions. -static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, - BasicBlock::iterator End, Loop *L) { - bool seenIncrement = false; - bool MultiExitLoop = false; - - if (!L->getExitingBlock()) - MultiExitLoop = true; +// Add phis to the new header and adjust the phi nodes from the OrigHeader. +void LoopRotate::addPhisToNewHeader(SmallVectorImpl &Blocks, + BasicBlock *NewHeader, BasicBlock *NewPreheader, + BasicBlock *NewLatch, ValueToValueMapTy &VMap) { + // Add to NewHeader phi nodes for all copied variables which are used. + for (BasicBlock *BB : Blocks) { + for (Instruction &Inst : *BB) { + // Skip Ins with no use e.g., branches. + if (Inst.use_begin() == Inst.use_end()) + continue; - for (BasicBlock::iterator I = Begin; I != End; ++I) { + for (auto UI = Inst.use_begin(), E = Inst.use_end(); UI != E;) { + Use &U = *UI++; + Instruction *UserInst = cast(U.getUser()); - if (!isSafeToSpeculativelyExecute(&*I)) - return false; + // Nothing to rename when the use is dominated by the definition. + if (DT->dominates(&Inst, UserInst)) + continue; - if (isa(I)) - continue; + if (!L->contains(UserInst->getParent())) { + // Handle uses in the loop-closed-phi. + PHINode *ClosePhi = cast(UserInst); + BasicBlock *Pred = ClosePhi->getIncomingBlock(U.getOperandNo()); - switch (I->getOpcode()) { - default: - return false; - case Instruction::GetElementPtr: - // GEPs are cheap if all indices are constant. - if (!cast(I)->hasAllConstantIndices()) - return false; - // fall-thru to increment case - case Instruction::Add: - case Instruction::Sub: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: { - Value *IVOpnd = - !isa(I->getOperand(0)) - ? I->getOperand(0) - : !isa(I->getOperand(1)) ? I->getOperand(1) : nullptr; - if (!IVOpnd) - return false; - - // If increment operand is used outside of the loop, this speculation - // could cause extra live range interference. - if (MultiExitLoop) { - for (User *UseI : IVOpnd->users()) { - auto *UserInst = cast(UseI); - if (!L->contains(UserInst)) - return false; + // Do not rename a loop close phi node if its predecessor in the loop + // is dominated by Inst. + if (L->contains(Pred) && DT->dominates(BB, Pred)) + continue; } + + PHINode *PN = PHINode::Create(Inst.getType(), 2, "phi.nh", + &*NewHeader->begin()); + PN->addIncoming(&Inst, NewLatch); + PN->addIncoming(cast(VMap[&Inst]), NewPreheader); + + // When Inst does not dominate U, it is going to use the updated + // definition coming from PN. + U.set(PN); } + } + } +} - if (seenIncrement) - return false; - seenIncrement = true; +// Add incoming values to the (already present) PHIs of NewH. +void LoopRotate::adjustNewHeaderPhis(ValueToValueMapTy &VMap, BasicBlock *NewH, + BasicBlock *NewPH) { + for (Instruction &Inst : *NewH) { + PHINode *PN = dyn_cast(&Inst); + if (!PN) break; + assert((PN->getNumOperands() == 1) && "NewH had multiple predecessors."); + Value *Op = PN->getIncomingValue(0); + if (Value *RenamedVal = VMap[Op]) + PN->addIncoming(RenamedVal, NewPH); + else // When no mapping is available (e.g., in case of a constant). + PN->addIncoming(Op, NewPH); + } +} + +BasicBlock * +LoopRotate::collectSEMEBlocks(BasicBlock *OrigH, BasicBlock *OrigLatch, + SmallVectorImpl &Blocks, + SmallPtrSetImpl &Exits) { + BasicBlock *NewH = nullptr; + for (auto I = df_begin(OrigH), E = df_end(OrigH); I != E;) { + if (!L->contains(*I)) { + I.skipChildren(); + continue; } - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - // ignore type conversions - break; + + // Copy until any BB where the branch does not exit loop, or the loop-latch. + if (OrigLatch == *I || !L->isLoopExiting(*I) + || !isa((*I)->getTerminator())) { + // This will become the new header. + NewH = *I; + I.skipChildren(); + } else { + Blocks.push_back(*I); + + BranchInst *BI = cast((*I)->getTerminator()); + for (unsigned B = 0, E = BI->getNumSuccessors(); B < E; ++B) { + BasicBlock *Succ = BI->getSuccessor(B); + if (!L->contains(Succ)) + Exits.insert(Succ); + } + ++I; } } - return true; + return NewH; } -/// Fold the loop tail into the loop exit by speculating the loop tail -/// instructions. Typically, this is a single post-increment. In the case of a -/// simple 2-block loop, hoisting the increment can be much better than -/// duplicating the entire loop header. In the case of loops with early exits, -/// rotation will not work anyway, but simplifyLoopLatch will put the loop in -/// canonical form so downstream passes can handle it. -/// -/// I don't believe this invalidates SCEV. -bool LoopRotate::simplifyLoopLatch(Loop *L) { - BasicBlock *Latch = L->getLoopLatch(); - if (!Latch || Latch->hasAddressTaken()) - return false; +/// Rotate loop L. +/// TODO: arrange newly inserted bbs. +void LoopRotate::rotateLoop() { + BasicBlock *OrigH = L->getHeader(); + BasicBlock *OrigLatch = L->getLoopLatch(); + BasicBlock *OrigPH = L->getLoopPreheader(); - BranchInst *Jmp = dyn_cast(Latch->getTerminator()); - if (!Jmp || !Jmp->isUnconditional()) - return false; + DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); - BasicBlock *LastExit = Latch->getSinglePredecessor(); - if (!LastExit || !L->isLoopExiting(LastExit)) - return false; + // Basic blocks to be copied. + SmallVector Blocks; + SmallPtrSet Exits; + // Collect all nodes of the loop from header to latch. + BasicBlock *NewH = collectSEMEBlocks(OrigH, OrigLatch, Blocks, Exits); + assert(NewH); + + ValueToValueMapTy VMap; + copySEME(Blocks, Exits, VMap, ".lr", DT, LI); + + // Redirect original preheader to the entry of SEME. + BranchInst *PBI = dyn_cast(OrigPH->getTerminator()); + assert(PBI && (1 == PBI->getNumSuccessors())); + + BasicBlock *CopyOrigH = cast(VMap[OrigH]); + PBI->setSuccessor(0, CopyOrigH); + DT->changeImmediateDominator(CopyOrigH, OrigPH); + L->moveToHeader(NewH); + + // Remove this code. + BasicBlock *BeforeLoop = nullptr; + for (BasicBlock *BB: predecessors(NewH)) + if (!L->contains(BB)) { + BeforeLoop = BB; + break; + } + assert(BeforeLoop); + + BasicBlock *NewPH = BasicBlock::Create(NewH->getContext(), + NewH->getName() + ".lr.ph", + NewH->getParent(), BeforeLoop); + Loop *OuterLoop = LI->getLoopFor(OrigPH); + if (OuterLoop) + OuterLoop->addBasicBlockToLoop(NewPH, *LI); + + // Move NewH physically to the beginning of the loop. + F->getBasicBlockList().splice(OrigH->getIterator(), F->getBasicBlockList(), + NewH); + // BeforeLoop --> NewPH --> NewH. + insertBetween(NewPH, BeforeLoop, NewH); + + DT->addNewBlock(NewPH, BeforeLoop); + DT->changeImmediateDominator(NewPH, BeforeLoop); + DT->changeImmediateDominator(NewH, NewPH); + + // Also, the original entry lost its immediate dominator so its dominator + // should be adjusted. We use SEME property => idom (OrigH) = its single pred. + DT->changeImmediateDominator(OrigH, OrigH->getSinglePredecessor()); + + for (BasicBlock *BB : Blocks) { + typedef DomTreeNodeBase DTNode; + DTNode *IDom = DT->getNode(BB); + std::vector::iterator I = IDom->begin(); + for (; I != IDom->end();) { + BasicBlock *ExitBB = (*I)->getBlock(); + if (L->contains(ExitBB)) { + ++I; + continue; + } - BranchInst *BI = dyn_cast(LastExit->getTerminator()); - if (!BI) - return false; + BasicBlock *StaleIDom = DT->getNode(ExitBB)->getIDom()->getBlock(); + //assert (BB == StaleIDom); + BasicBlock *NewBB = cast(VMap[BB]); + // VERIFY: NewIDom will be correct because this part of CFG is up-to-date. + BasicBlock *NewIDom = DT->findNearestCommonDominator(StaleIDom, NewBB); + NewIDom = DT->findNearestCommonDominator(NewIDom, BB); + if (NewIDom != StaleIDom) { + DT->changeImmediateDominator(ExitBB, NewIDom); + DEBUG(dbgs() << "\nChanging IDom of " << *ExitBB << "to" << *NewIDom); + I = IDom->begin(); + } else + ++I; + } + } - if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L)) - return false; + adjustNewHeaderPhis(VMap, NewH, NewPH); - DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " - << LastExit->getName() << "\n"); + BasicBlock *NewLatch = L->getLoopLatch(); + assert(L->getLoopPreheader() && "Invalid loop preheader after rotation"); + assert(NewLatch && "Invalid loop latch after rotation"); - // Hoist the instructions from Latch into LastExit. - LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(), - Latch->begin(), Jmp->getIterator()); + addPhisToNewHeader(Blocks, NewH, NewPH, NewLatch, VMap); - unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1; - BasicBlock *Header = Jmp->getSuccessor(0); - assert(Header == L->getHeader() && "expected a backward branch"); + // Discard incoming values in the CopyOrigHeader, which are coming from + // OrigLatch since it has only one predecessor. + discardIncomingValues(CopyOrigH, OrigLatch); + discardIncomingValues(OrigH, OrigPH); - // Remove Latch from the CFG so that LastExit becomes the new Latch. - BI->setSuccessor(FallThruPath, Header); - Latch->replaceSuccessorsPhiUsesWith(LastExit); - Jmp->eraseFromParent(); + assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); + LI->verify(); + verifyFunction(*F); + DEBUG(dbgs() << "LoopRotation: into "; L->dumpVerbose()); - // Nuke the Latch block. - assert(Latch->empty() && "unable to evacuate Latch"); - LI->removeBlock(Latch); - if (DT) - DT->eraseNode(Latch); - Latch->eraseFromParent(); - return true; + assert (isSEME(L->getHeader(), NewLatch, DT)); + assert (isSEME(CopyOrigH, NewPH, DT)); + + ++NumRotated; } /// Rotate \c L, and return true if any modification was made. -bool LoopRotate::processLoop(Loop *L) { +bool LoopRotate::processLoop() { // Save the loop metadata. MDNode *LoopMD = L->getLoopID(); - // Simplify the loop latch before attempting to rotate the header - // upward. Rotation may not be needed if the loop tail can be folded into the - // loop exit. - bool SimplifiedLatch = simplifyLoopLatch(L); + BasicBlock *LoopLatch = L->getLoopLatch(); + if (!LoopLatch) + return false; + + if (!isLegalToRotate()) + return false; + + if (!isProfitableToRotate()) + return false; - bool MadeChange = rotateLoop(L, SimplifiedLatch); - assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) && + if (MaxLoopsRotated != -1) { + if (LoopsRotated >= MaxLoopsRotated) + return false; + ++LoopsRotated; + } + + // Make sure the latch has only one successor. + if (!LoopLatch->getSingleSuccessor()) { + DEBUG(dbgs() << "\nSplitting the edge of Loop:"; L->dumpVerbose();); + LoopLatch = SplitEdge(LoopLatch, L->getHeader(), DT, LI); + } + + assert(LoopLatch->getSingleSuccessor()); + + rotateLoop(); + assert(L->isLoopExiting(L->getLoopLatch()) && "Loop latch should be exiting after loop-rotate."); // Restore the loop metadata. // NB! We presume LoopRotation DOESN'T ADD its own metadata. - if ((MadeChange || SimplifiedLatch) && LoopMD) + if (LoopMD) L->setLoopID(LoopMD); - return MadeChange; + return true; } LoopRotatePass::LoopRotatePass() {} @@ -630,10 +458,9 @@ // Optional analyses. auto *DT = FAM.getCachedResult(*F); - auto *SE = FAM.getCachedResult(*F); - LoopRotate LR(DefaultRotationThreshold, LI, TTI, AC, DT, SE); + LoopRotate LR(RotationMaxHeaderSize, LI, TTI, AC, DT, F, &L); - bool Changed = LR.processLoop(&L); + bool Changed = LR.processLoop(); if (!Changed) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); @@ -649,7 +476,7 @@ LoopRotateLegacyPass(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { initializeLoopRotateLegacyPassPass(*PassRegistry::getPassRegistry()); if (SpecifiedMaxHeaderSize == -1) - MaxHeaderSize = DefaultRotationThreshold; + MaxHeaderSize = RotationMaxHeaderSize; else MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize); } @@ -658,6 +485,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); getLoopAnalysisUsage(AU); } @@ -671,10 +499,8 @@ auto *AC = &getAnalysis().getAssumptionCache(F); auto *DTWP = getAnalysisIfAvailable(); auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; - auto *SEWP = getAnalysisIfAvailable(); - auto *SE = SEWP ? &SEWP->getSE() : nullptr; - LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE); - return LR.processLoop(L); + LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, &F, L); + return LR.processLoop(); } }; } @@ -684,6 +510,7 @@ false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopRotateLegacyPass, "loop-rotate", "Rotate Loops", false, false) Index: llvm/lib/Transforms/Utils/CloneFunction.cpp =================================================================== --- llvm/lib/Transforms/Utils/CloneFunction.cpp +++ llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -710,3 +710,133 @@ return NewLoop; } + +bool llvm::isSESE(const BasicBlock *Entry, const BasicBlock *Exit, + DominatorTree *DT, DominatorTree *PDT) { + if (!DT->dominates(Entry, Exit)) + return false; + + if (!PDT->dominates(Exit, Entry)) + return false; + + for (auto I = df_begin(Entry), E = df_end(Entry); I != E;) { + if (*I == Exit) { + I.skipChildren(); + continue; + } + if (!DT->dominates(Entry, *I)) + return false; + ++I; + } + return true; +} + +bool llvm::isSEME(const BasicBlock *Entry, const BasicBlock *Exit, + DominatorTree *DT) { + if (!DT->dominates(Entry, Exit)) + return false; + + for (auto I = idf_begin(Exit), E = idf_end(Exit); I != E;) { + if (*I == Entry) { + I.skipChildren(); + continue; + } + + if (!DT->dominates(Entry, *I)) + return false; + + ++I; + } + return true; +} + +static void adjustExitingPhis(ValueToValueMapTy &VMap, + const SmallPtrSetImpl &Exits) { + for (BasicBlock *BB : Exits) { + for (Instruction &Inst : *BB) { + PHINode *PN = dyn_cast(&Inst); + if (!PN) + break; + bool EdgeFromOrigBB = false; + for (unsigned i = 0, e = PN->getNumOperands(); i != e; ++i) { + Value *CopyB = VMap[PN->getIncomingBlock(i)]; + if (!CopyB) // Skip args coming from outside the SEME. + continue; + BasicBlock *CopyBB = cast(CopyB); + EdgeFromOrigBB = true; + Value *Op = PN->getIncomingValue(i); + if (Value *RenamedVal = VMap[Op]) + PN->addIncoming(RenamedVal, CopyBB); + else + // When no mapping is available it must be a constant. + PN->addIncoming(Op, CopyBB); + } + assert(EdgeFromOrigBB && "Illegal exit from SEME."); + } + } +} + +BasicBlock* llvm::copySEME(const SmallVectorImpl &Blocks, + const SmallPtrSetImpl &Exits, + ValueToValueMapTy &VMap, + const Twine &NameSuffix, + DominatorTree *DT, LoopInfo *LI) { + BasicBlock *DomEntry = DT->getNode(Blocks[0])->getIDom()->getBlock(); + assert(DomEntry && "no dominator"); + + Function *F = DomEntry->getParent(); + BasicBlock *OrigH = Blocks[0]; + SmallVector NewBlocks; + for (BasicBlock *BB : Blocks) { + BasicBlock *NewBB = CloneBasicBlock(BB, VMap, NameSuffix, F); + // Move them physically from the end of the block list. + F->getBasicBlockList().splice(OrigH->getIterator(), F->getBasicBlockList(), + NewBB); + Loop *BBLoop = LI->getLoopFor(BB); + Loop *BBParentLoop = BBLoop->getParentLoop(); + if (BBParentLoop) + BBParentLoop->addBasicBlockToLoop(NewBB, *LI); + VMap[BB] = NewBB; + NewBlocks.push_back(NewBB); + } + + remapInstructionsInBlocks(NewBlocks, VMap); + + for (BasicBlock *BB : Blocks) { + BasicBlock *NewBB = cast_or_null(VMap[BB]); + BranchInst *BI = dyn_cast(NewBB->getTerminator()); + if (!BI) + continue; + + for (unsigned I = 0, E = BI->getNumSuccessors(); I < E; ++I) { + BasicBlock *NewSucc = cast_or_null(VMap[BI->getSuccessor(I)]); + if (!NewSucc) + continue; + BI->setSuccessor(I, NewSucc); + } + } + + // For all the basic blocks in the SEME, update the DOM. Except for the entry + // block the tree structure is the same so the dominators also follow the same + // structural property. If the imm-dom of orig BB is not in SEME that means it + // is the entry block, in that case the new idom of the new BB must be its + // single predecessor because we are dealing with an SEME region. + BasicBlock *EntryNewSEME = nullptr; + for (BasicBlock *BB : Blocks) { + BasicBlock *NewBB = cast_or_null(VMap[BB]); + assert(NewBB); + + BasicBlock *Dom = DT->getNode(BB)->getIDom()->getBlock(); + BasicBlock *NewDom = cast_or_null(VMap[Dom]); + if (!NewDom) { // Dom does not belong to SEME => entry block. + EntryNewSEME = NewBB; + NewDom = Dom; + assert (Dom == DomEntry); + } + DT->addNewBlock(NewBB, NewDom); + DT->changeImmediateDominator(NewBB, NewDom); + } + + adjustExitingPhis(VMap, Exits); + return EntryNewSEME; +} Index: llvm/test/Analysis/GlobalsModRef/memset-escape.ll =================================================================== --- llvm/test/Analysis/GlobalsModRef/memset-escape.ll +++ llvm/test/Analysis/GlobalsModRef/memset-escape.ll @@ -6,15 +6,19 @@ @a = internal global [3 x i32] zeroinitializer, align 4 @b = common global i32 0, align 4 -; The important thing we're checking for here is the reload of (some element of) -; @a after the memset. +; Check that load and the call to abort is redundant. +; CHECK: store i32 1, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4 +; CHECK: store i32 0, i32* @b, align 4 +; CHECK: br label %for.body -; CHECK-LABEL: @main -; CHECK: call void @llvm.memset.p0i8.i64{{.*}} @a -; CHECK: store i32 3 -; CHECK: load i32, i32* getelementptr {{.*}} @a -; CHECK: icmp eq i32 -; CHECK: br i1 +; CHECK: for.body: ; preds = %for.body.preheader +; CHECK: store i32 0, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 0), align 4 +; CHECK: store i32 0, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 1), align 4 +; CHECK: store i32 0, i32* getelementptr inbounds ([3 x i32], [3 x i32]* @a, i64 0, i64 2), align 4 +; CHECK: store i32 3, i32* @b, align 4 +; CHECK: br i1 true, label %if.end, label %if.then +; CHECK-NOT: load +; CHECK-NOT: call void @abort() define i32 @main() { entry: Index: llvm/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll +++ llvm/test/Analysis/ScalarEvolution/2012-03-26-LoadConstant.ll @@ -1,4 +1,5 @@ -; RUN: opt < %s -basicaa -globalopt -instcombine -loop-rotate -licm -instcombine -indvars -loop-deletion -constmerge -S | FileCheck %s +; RUN: opt < %s -basicaa -globalopt -instcombine -loop-rotate -licm -simplifycfg -S | FileCheck %s + ; PR11882: ComputeLoadConstantCompareExitLimit crash. ; ; for.body is deleted leaving a loop-invariant load. Index: llvm/test/Transforms/LoopRotate/alloca.ll =================================================================== --- llvm/test/Transforms/LoopRotate/alloca.ll +++ llvm/test/Transforms/LoopRotate/alloca.ll @@ -5,8 +5,9 @@ ; We expect a different value for %ptr each iteration (according to the ; definition of alloca). I.e. each @use must be paired with an alloca. -; CHECK: call void @use(i8* % -; CHECK: %ptr = alloca i8 +; CHECK: alloca i8 +; CHECK: call void @use +; CHECK: alloca i8 @e = global i16 10 Index: llvm/test/Transforms/LoopRotate/basic.ll =================================================================== --- llvm/test/Transforms/LoopRotate/basic.ll +++ llvm/test/Transforms/LoopRotate/basic.ll @@ -18,9 +18,10 @@ %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0 br i1 %cmp, label %for.body, label %for.end -; CHECK: for.body: +; CHECK: for.cond.lr: ; CHECK-NEXT: phi i32 [ 0 -; CHECK-NEXT: store i32 0 +; CHECK: for.body: +; CHECK: store i32 0 for.body: ; preds = %for.cond store i32 0, i32* %arrayidx, align 16 Index: llvm/test/Transforms/LoopRotate/dbgvalue.ll =================================================================== --- llvm/test/Transforms/LoopRotate/dbgvalue.ll +++ llvm/test/Transforms/LoopRotate/dbgvalue.ll @@ -5,9 +5,9 @@ define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !0 { ; CHECK-LABEL: define i32 @tak( -; CHECK: entry -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x -; CHECK: tail call void @llvm.dbg.value(metadata i32 %call +; CHECK: tailrecurse.lr +; CHECK: call void @llvm.dbg.value(metadata i32 %x +; CHECK: tail call void @llvm.dbg.value(metadata i32 %y entry: br label %tailrecurse @@ -42,7 +42,7 @@ ; CHECK-LABEL: define i32 @tak2( ; CHECK: entry ; CHECK: tail call void @llvm.dbg.value(metadata i32 %x.tr -; CHECK: tail call void @llvm.dbg.value(metadata i32 undef +; CHECK: tail call void @llvm.dbg.value(metadata i32 %y.tr entry: br label %tailrecurse @@ -83,12 +83,13 @@ ; Ensure that the loop increment basic block is rotated into the tail of the ; body, even though it contains a debug intrinsic call. ; CHECK-LABEL: define void @FindFreeHorzSeg( +; CHECK: for.inc: +; CHECK: phi i64 [ %{{[^,]*}}, %{{[^,]*}} ] ; CHECK: %dec = add ; CHECK-NEXT: tail call void @llvm.dbg.value +; CHECK-NEXT: br label %for.cond ; CHECK: %cmp = icmp ; CHECK: br i1 %cmp -; CHECK: phi i64 [ %{{[^,]*}}, %{{[^,]*}} ] -; CHECK-NEXT: br label %for.end entry: Index: llvm/test/Transforms/LoopRotate/loop-rotate-0.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopRotate/loop-rotate-0.ll @@ -0,0 +1,30 @@ +; RUN: opt -S < %s -loop-rotate -verify-dom-info -verify-loop-info +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @a2i_ASN1_ENUMERATED() { +entry: + br label %for.cond + +for.cond: ; preds = %if.end94, %entry + %first.0 = phi i32 [ 1, %entry ], [ 0, %if.end94 ] + br i1 undef, label %err_sl, label %if.end + +if.end: ; preds = %for.cond + br i1 undef, label %err_sl, label %if.end75 + +if.end75: ; preds = %if.end + br i1 undef, label %if.end94, label %if.then93 + +if.then93: ; preds = %if.end75 + ret i32 0 + +if.end94: ; preds = %if.end75 + %call179 = tail call i32 @gets() + br label %for.cond + +err_sl: ; preds = %if.end, %for.cond + unreachable +} + +declare i32 @gets() Index: llvm/test/Transforms/LoopRotate/loop-rotate-1.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopRotate/loop-rotate-1.ll @@ -0,0 +1,30 @@ +; RUN: opt -S < %s -loop-rotate -verify-dom-info -verify-loop-info +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%fp = type { i64 } + +declare void @foo(%fp* %this, %fp* %that) + +define void @bar(%fp* %__begin1, %fp* %__end1, %fp** %__end2) { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %__end1.addr.0 = phi %fp* [ %__end1, %entry ], [ %incdec.ptr, %while.body ] + %cmp = icmp eq %fp* %__end1.addr.0, %__begin1 + br i1 %cmp, label %while.end, label %while.body + +while.body: ; preds = %while.cond + %0 = load %fp*, %fp** %__end2, align 8 + %add.ptr = getelementptr inbounds %fp, %fp* %0, i64 -1 + %incdec.ptr = getelementptr inbounds %fp, %fp* %__end1.addr.0, i64 -1 + tail call void @foo(%fp* %add.ptr, %fp* %incdec.ptr) + %1 = load %fp*, %fp** %__end2, align 8 + %incdec.ptr2 = getelementptr inbounds %fp, %fp* %1, i64 -1 + store %fp* %incdec.ptr2, %fp** %__end2, align 8 + br label %while.cond + +while.end: ; preds = %while.cond + ret void +} Index: llvm/test/Transforms/LoopRotate/loop-rotate-10.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopRotate/loop-rotate-10.ll @@ -0,0 +1,35 @@ +; RUN: opt -S < %s -loop-rotate -verify-dom-info -verify-loop-info +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare i32 @foo() +define i32 @bar() { +entry: + br label %for.cond + +for.cond: ; preds = %if.then178, %entry + %first.0 = phi i32 [ 1, %entry ], [ 0, %if.then178 ] + br i1 undef, label %err_sl, label %if.end + +if.end: ; preds = %for.cond + br i1 undef, label %err_sl, label %if.end75 + +if.end75: ; preds = %if.end + br i1 undef, label %if.end94, label %if.then93 + +if.then93: ; preds = %if.end75 + unreachable + +if.end94: ; preds = %if.end75 + br i1 undef, label %if.then178, label %for.end182 + +if.then178: ; preds = %if.end94 + %call179 = tail call i32 @foo() + br label %for.cond + +for.end182: ; preds = %if.end94 + ret i32 1 + +err_sl: ; preds = %if.end, %for.cond + unreachable +} Index: llvm/test/Transforms/LoopRotate/loop-rotate-2.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopRotate/loop-rotate-2.ll @@ -0,0 +1,36 @@ +; RUN: opt -S < %s -loop-rotate -verify-dom-info -verify-loop-info +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%0 = type { i64, i64, i8* } +%1 = type { %2 } +%2 = type { %3 } +%3 = type { %4 } +%4 = type { %5 } +%5 = type { %6 } +%6 = type { i64, i64, i8* } +%7 = type { i8 } + +declare void @foo(%0*, %0*) + +define linkonce_odr void @bar(%7*, %0*, %0*, %0**) { + br label %5 + +;