Index: include/llvm/Analysis/PostDominators.h =================================================================== --- include/llvm/Analysis/PostDominators.h +++ include/llvm/Analysis/PostDominators.h @@ -29,6 +29,9 @@ struct PostDominatorTree : public PostDomTreeBase { using Base = PostDomTreeBase; + PostDominatorTree() = default; + explicit PostDominatorTree(Function &F) { recalculate(F); } + /// Handle invalidation explicitly. bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &); Index: include/llvm/CodeGen/MachineDominators.h =================================================================== --- include/llvm/CodeGen/MachineDominators.h +++ include/llvm/CodeGen/MachineDominators.h @@ -208,13 +208,6 @@ DT->eraseNode(BB); } - /// splitBlock - BB is split and now it has one successor. Update dominator - /// tree to reflect this change. - inline void splitBlock(MachineBasicBlock* NewBB) { - applySplitCriticalEdges(); - DT->splitBlock(NewBB); - } - /// isReachableFromEntry - Return true if A is dominated by the entry /// block of the function containing it. bool isReachableFromEntry(const MachineBasicBlock *A) { Index: include/llvm/IR/Dominators.h =================================================================== --- include/llvm/IR/Dominators.h +++ include/llvm/IR/Dominators.h @@ -284,7 +284,8 @@ }; //===------------------------------------- -/// \brief Class to defer updates to a DominatorTree. +/// \brief Class to defer updates to a DominatorTree and (optionally) +/// PostDominatorTree. /// /// Definition: Applying updates to every edge insertion and deletion is /// expensive and not necessary. When one needs the DominatorTree for analysis @@ -292,7 +293,8 @@ /// advantage of the DominatorTree inspecting the set of updates to find /// duplicates or unnecessary subtree updates. /// -/// The scope of DeferredDominance operates at a Function level. +/// The scope of DeferredDominance operates at a Function level. This class +/// automatically flushes any remaining updates when destroyed. /// /// It is not necessary for the user to scrub the updates for duplicates or /// updates that point to the same block (Delete, BB_A, BB_A). Performance @@ -312,7 +314,9 @@ /// TerminatorInstructionBB->removeFromParent(); class DeferredDominance { public: - DeferredDominance(DominatorTree &DT_) : DT(DT_) {} + DeferredDominance(DominatorTree &DT_, PostDominatorTree *PDT_ = nullptr) + : DT(DT_), PDT(PDT_) {} + ~DeferredDominance(); /// \brief Queues multiple updates and discards duplicates. void applyUpdates(ArrayRef Updates); @@ -335,9 +339,17 @@ /// \brief Returns true if DelBB is awaiting deletion at a flush() event. bool pendingDeletedBB(BasicBlock *DelBB); - /// \brief Flushes all pending updates and block deletions. Returns a - /// correct DominatorTree reference to be used by the caller for analysis. - DominatorTree &flush(); + /// \brief Flushes all pending updates and block deletions. + void flush(); + + /// \brief Flushes and returns a correct DominatorTree reference to + /// be used by the caller for analysis. + DominatorTree &getDT(); + + /// \brief Flushes and returns a correct PostDominatorTree pointer to + /// be used by the caller for analysis. Returns nullptr if PDT is not + /// being held. + PostDominatorTree *getPDT(); /// \brief Drops all internal state and forces a (slow) recalculation of the /// DominatorTree based on the current state of the LLVM IR in F. This should @@ -349,6 +361,7 @@ private: DominatorTree &DT; + PostDominatorTree *PDT; SmallVector PendUpdates; SmallPtrSet DeletedBBs; Index: include/llvm/Support/GenericDomTree.h =================================================================== --- include/llvm/Support/GenericDomTree.h +++ include/llvm/Support/GenericDomTree.h @@ -671,15 +671,6 @@ } } - /// splitBlock - BB is split and now it has one successor. Update dominator - /// tree to reflect this change. - void splitBlock(NodeT *NewBB) { - if (IsPostDominator) - Split>(NewBB); - else - Split(NewBB); - } - /// print - Convert to human readable form /// void print(raw_ostream &O) const { @@ -788,63 +779,6 @@ SlowQueries = 0; } - // NewBB is split and now it has one successor. Update dominator tree to - // reflect this change. - template - void Split(typename GraphTraits::NodeRef NewBB) { - using GraphT = GraphTraits; - using NodeRef = typename GraphT::NodeRef; - assert(std::distance(GraphT::child_begin(NewBB), - GraphT::child_end(NewBB)) == 1 && - "NewBB should have a single successor!"); - NodeRef NewBBSucc = *GraphT::child_begin(NewBB); - - std::vector PredBlocks; - for (const auto &Pred : children>(NewBB)) - PredBlocks.push_back(Pred); - - assert(!PredBlocks.empty() && "No predblocks?"); - - bool NewBBDominatesNewBBSucc = true; - for (const auto &Pred : children>(NewBBSucc)) { - if (Pred != NewBB && !dominates(NewBBSucc, Pred) && - isReachableFromEntry(Pred)) { - NewBBDominatesNewBBSucc = false; - break; - } - } - - // Find NewBB's immediate dominator and create new dominator tree node for - // NewBB. - NodeT *NewBBIDom = nullptr; - unsigned i = 0; - for (i = 0; i < PredBlocks.size(); ++i) - if (isReachableFromEntry(PredBlocks[i])) { - NewBBIDom = PredBlocks[i]; - break; - } - - // It's possible that none of the predecessors of NewBB are reachable; - // in that case, NewBB itself is unreachable, so nothing needs to be - // changed. - if (!NewBBIDom) return; - - for (i = i + 1; i < PredBlocks.size(); ++i) { - if (isReachableFromEntry(PredBlocks[i])) - NewBBIDom = findNearestCommonDominator(NewBBIDom, PredBlocks[i]); - } - - // Create the new dominator tree node... and set the idom of NewBB. - DomTreeNodeBase *NewBBNode = addNewBlock(NewBB, NewBBIDom); - - // If NewBB strictly dominates other blocks, then it is now the immediate - // dominator of NewBBSucc. Update the dominator tree as appropriate. - if (NewBBDominatesNewBBSucc) { - DomTreeNodeBase *NewBBSuccNode = getNode(NewBBSucc); - changeImmediateDominator(NewBBSuccNode, NewBBNode); - } - } - private: bool dominatedBySlowTreeWalk(const DomTreeNodeBase *A, const DomTreeNodeBase *B) const { Index: include/llvm/Transforms/Utils/BasicBlockUtils.h =================================================================== --- include/llvm/Transforms/Utils/BasicBlockUtils.h +++ include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -194,10 +194,15 @@ /// It will have Suffix+".split_lp". See SplitLandingPadPredecessors for more /// details on this case. /// -/// This currently updates the LLVM IR, DominatorTree, LoopInfo, and LCCSA but -/// no other analyses. In particular, it does not preserve LoopSimplify -/// (because it's complicated to handle the case where one of the edges being -/// split is an exit of a loop with other exits). +/// This currently updates the LLVM IR, DominatorTree and PostDominatorTree +/// through DeferredDominance, LoopInfo, and LCCSA but no other analyses. In +/// particular, it does not preserve LoopSimplify (because it's complicated to +/// handle the case where one of the edges being split is an exit of a loop with +/// other exits). +BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef Preds, + const char *Suffix, DeferredDominance *DDT, + LoopInfo *LI = nullptr, + bool PreserveLCSSA = false); BasicBlock *SplitBlockPredecessors(BasicBlock *BB, ArrayRef Preds, const char *Suffix, DominatorTree *DT = nullptr, @@ -219,7 +224,7 @@ ArrayRef Preds, const char *Suffix, const char *Suffix2, SmallVectorImpl &NewBBs, - DominatorTree *DT = nullptr, + DeferredDominance *DDT = nullptr, LoopInfo *LI = nullptr, bool PreserveLCSSA = false); Index: include/llvm/Transforms/Utils/LoopSimplify.h =================================================================== --- include/llvm/Transforms/Utils/LoopSimplify.h +++ include/llvm/Transforms/Utils/LoopSimplify.h @@ -56,7 +56,10 @@ /// /// This takes a potentially un-simplified loop L (and its children) and turns /// it into a simplified loop nest with preheaders and single backedges. It will -/// update \c AliasAnalysis and \c ScalarEvolution analyses if they're non-null. +/// update DominatorTree/PostDominatorTree through DeferredDominance, LoopInfo +/// and ScalarEvolution analyses if they're non-null. +bool simplifyLoop(Loop *L, DeferredDominance *DDT, LoopInfo *LI, + ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA); bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA); Index: include/llvm/Transforms/Utils/LoopUtils.h =================================================================== --- include/llvm/Transforms/Utils/LoopUtils.h +++ include/llvm/Transforms/Utils/LoopUtils.h @@ -39,6 +39,7 @@ class Loop; class LoopInfo; class OptimizationRemarkEmitter; +struct PostDominatorTree; class PredicatedScalarEvolution; class PredIteratorCache; class ScalarEvolution; @@ -377,6 +378,8 @@ SmallVector RedundantCasts; }; +BasicBlock *InsertPreheaderForLoop(Loop *L, DeferredDominance *DDT, + LoopInfo *LI, bool PreserveLCSSA); BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA); @@ -384,7 +387,10 @@ /// /// For any loop exit block with non-loop predecessors, we split the loop /// predecessors to use a dedicated loop exit block. We update the dominator -/// tree and loop info if provided, and will preserve LCSSA if requested. +/// tree/postdominator tree (though DeferredDominance) and loop info if +/// provided, and will preserve LCSSA if requested. +bool formDedicatedExitBlocks(Loop *L, DeferredDominance *DDT, LoopInfo *LI, + bool PreserveLCSSA); bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA); Index: lib/IR/Dominators.cpp =================================================================== --- lib/IR/Dominators.cpp +++ lib/IR/Dominators.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" @@ -394,6 +395,8 @@ // //===----------------------------------------------------------------------===// +DeferredDominance::~DeferredDominance() { flush(); } + /// \brief Queues multiple updates and discards duplicates. void DeferredDominance::applyUpdates( ArrayRef Updates) { @@ -448,19 +451,33 @@ return DeletedBBs.count(DelBB) != 0; } -/// \brief Flushes all pending updates and block deletions. Returns a -/// correct DominatorTree reference to be used by the caller for analysis. -DominatorTree &DeferredDominance::flush() { +/// \brief Flushes all pending updates and block deletions. +void DeferredDominance::flush() { // Updates to DT must happen before blocks are deleted below. Otherwise the // DT traversal will encounter badref blocks and assert. if (!PendUpdates.empty()) { DT.applyUpdates(PendUpdates); + if (PDT) + PDT->applyUpdates(PendUpdates); PendUpdates.clear(); } flushDelBB(); +} + +/// \brief Flushes and returns a correct DominatorTree reference to +/// be used by the caller for analysis. +DominatorTree &DeferredDominance::getDT() { + flush(); return DT; } +/// \brief Flushes and returns a correct PostDominatorTree reference to +/// be used by the caller for analysis. +PostDominatorTree *DeferredDominance::getPDT() { + flush(); + return PDT; +} + /// \brief Drops all internal state and forces a (slow) recalculation of the /// DominatorTree based on the current state of the LLVM IR in F. This should /// only be used in corner cases such as the Entry block of F being deleted. @@ -470,6 +487,8 @@ // actual DT. if (flushDelBB() || !PendUpdates.empty()) { DT.recalculate(F); + if (PDT) + PDT->recalculate(F); PendUpdates.clear(); } } @@ -567,8 +586,11 @@ bool DeferredDominance::flushDelBB() { if (DeletedBBs.empty()) return false; - for (auto *BB : DeletedBBs) + for (auto *BB : DeletedBBs) { + if (PDT && PDT->getNode(BB)) + PDT->eraseNode(BB); BB->eraseFromParent(); + } DeletedBBs.clear(); return true; } Index: lib/Transforms/Scalar/LoopStrengthReduce.cpp =================================================================== --- lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -5127,7 +5127,8 @@ .setDontDeleteUselessPHIs()); } else { SmallVector NewBBs; - SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DT, &LI); + DeferredDominance DDT(DT); + SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DDT, &LI); NewBB = NewBBs[0]; } // If NewBB==NULL, then SplitCriticalEdge refused to split because all Index: lib/Transforms/Utils/BasicBlockUtils.cpp =================================================================== --- lib/Transforms/Utils/BasicBlockUtils.cpp +++ lib/Transforms/Utils/BasicBlockUtils.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -316,17 +317,25 @@ /// Update DominatorTree, LoopInfo, and LCCSA analysis information. static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB, ArrayRef Preds, - DominatorTree *DT, LoopInfo *LI, + DeferredDominance *DDT, LoopInfo *LI, bool PreserveLCSSA, bool &HasLoopExit) { // Update dominator tree if available. - if (DT) - DT->splitBlock(NewBB); + if (DDT) { + std::vector Updates; + Updates.push_back({DominatorTree::UpdateKind::Insert, NewBB, OldBB}); + for (auto *Pred : Preds) { + Updates.push_back({DominatorTree::UpdateKind::Insert, Pred, NewBB}); + Updates.push_back({DominatorTree::UpdateKind::Delete, Pred, OldBB}); + } + DDT->applyUpdates(Updates); + } // The rest of the logic is only relevant for updating the loop structures. if (!LI) return; - assert(DT && "DT should be available to update LoopInfo!"); + assert(DDT && "DDT should be available to update LoopInfo!"); + DominatorTree &DT = DDT->getDT(); Loop *L = LI->getLoopFor(OldBB); // If we need to preserve loop analyses, collect some information about how @@ -338,7 +347,7 @@ // OldBB is a loop entry or if SplitMakesNewLoopHeader. Unreachable blocks // are not within any loops, so we incorrectly mark SplitMakesNewLoopHeader // as true and make the NewBB the header of some loop. This breaks LI. - if (!DT->isReachableFromEntry(Pred)) + if (!DT.isReachableFromEntry(Pred)) continue; // If we need to preserve LCSSA, determine if any of the preds is a loop // exit. @@ -461,8 +470,9 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, ArrayRef Preds, - const char *Suffix, DominatorTree *DT, - LoopInfo *LI, bool PreserveLCSSA) { + const char *Suffix, + DeferredDominance *DDT, LoopInfo *LI, + bool PreserveLCSSA) { // Do not attempt to split that which cannot be split. if (!BB->canSplitPredecessors()) return nullptr; @@ -473,7 +483,7 @@ SmallVector NewBBs; std::string NewName = std::string(Suffix) + ".split-lp"; - SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DT, + SplitLandingPadPredecessors(BB, Preds, Suffix, NewName.c_str(), NewBBs, DDT, LI, PreserveLCSSA); return NewBBs[0]; } @@ -509,7 +519,7 @@ // Update DominatorTree, LoopInfo, and LCCSA analysis information. bool HasLoopExit = false; - UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA, + UpdateAnalysisInformation(BB, NewBB, Preds, DDT, LI, PreserveLCSSA, HasLoopExit); // Update the PHI nodes in BB with the values coming from NewBB. @@ -517,11 +527,23 @@ return NewBB; } +BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, + ArrayRef Preds, + const char *Suffix, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { + if (DT) { + DeferredDominance DDT(*DT); + return SplitBlockPredecessors(BB, Preds, Suffix, &DDT, LI, PreserveLCSSA); + } + return SplitBlockPredecessors(BB, Preds, Suffix, (DeferredDominance *)nullptr, + LI, PreserveLCSSA); +} + void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB, ArrayRef Preds, const char *Suffix1, const char *Suffix2, SmallVectorImpl &NewBBs, - DominatorTree *DT, LoopInfo *LI, + DeferredDominance *DDT, LoopInfo *LI, bool PreserveLCSSA) { assert(OrigBB->isLandingPad() && "Trying to split a non-landing pad!"); @@ -547,7 +569,7 @@ } bool HasLoopExit = false; - UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DT, LI, PreserveLCSSA, + UpdateAnalysisInformation(OrigBB, NewBB1, Preds, DDT, LI, PreserveLCSSA, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB1. @@ -583,7 +605,7 @@ // Update DominatorTree, LoopInfo, and LCCSA analysis information. HasLoopExit = false; - UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DT, LI, + UpdateAnalysisInformation(OrigBB, NewBB2, NewBB2Preds, DDT, LI, PreserveLCSSA, HasLoopExit); // Update the PHI nodes in OrigBB with the values coming from NewBB2. Index: lib/Transforms/Utils/BreakCriticalEdges.cpp =================================================================== --- lib/Transforms/Utils/BreakCriticalEdges.cpp +++ lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -282,8 +282,9 @@ } if (!LoopPreds.empty()) { assert(!DestBB->isEHPad() && "We don't split edges to EH pads!"); + DeferredDominance DDT(*DT); BasicBlock *NewExitBB = SplitBlockPredecessors( - DestBB, LoopPreds, "split", DT, LI, Options.PreserveLCSSA); + DestBB, LoopPreds, "split", &DDT, LI, Options.PreserveLCSSA); if (Options.PreserveLCSSA) createPHIsForSplitLoopExit(LoopPreds, NewExitBB, DestBB); } Index: lib/Transforms/Utils/LoopSimplify.cpp =================================================================== --- lib/Transforms/Utils/LoopSimplify.cpp +++ lib/Transforms/Utils/LoopSimplify.cpp @@ -50,6 +50,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/IR/CFG.h" @@ -114,7 +115,7 @@ /// preheader, this method is called to insert one. This method has two phases: /// preheader insertion and analysis updating. /// -BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, +BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DeferredDominance *DDT, LoopInfo *LI, bool PreserveLCSSA) { BasicBlock *Header = L->getHeader(); @@ -136,7 +137,7 @@ // Split out the loop pre-header. BasicBlock *PreheaderBB; - PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT, + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DDT, LI, PreserveLCSSA); if (!PreheaderBB) return nullptr; @@ -150,6 +151,15 @@ return PreheaderBB; } +BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, + LoopInfo *LI, bool PreserveLCSSA) { + if (DT) { + DeferredDominance DDT(*DT); + return InsertPreheaderForLoop(L, &DDT, LI, PreserveLCSSA); + } + return InsertPreheaderForLoop(L, (DeferredDominance *)nullptr, LI, + PreserveLCSSA); +} /// Add the specified block, and all of its predecessors, to the specified set, /// if it's not already in there. Stop predecessor traversal when we reach @@ -172,13 +182,14 @@ /// \brief The first part of loop-nestification is to find a PHI node that tells /// us how to partition the loops. -static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, +static PHINode *findPHIToPartitionLoops(Loop *L, DeferredDominance *DDT, AssumptionCache *AC) { const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + DominatorTree &DT = DDT->getDT(); for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ) { PHINode *PN = cast(I); ++I; - if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { + if (Value *V = SimplifyInstruction(PN, {DL, nullptr, &DT, AC})) { // This is a degenerate PHI already, don't modify it! PN->replaceAllUsesWith(V); PN->eraseFromParent(); @@ -215,7 +226,7 @@ /// created. /// static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, - DominatorTree *DT, LoopInfo *LI, + DeferredDominance *DDT, LoopInfo *LI, ScalarEvolution *SE, bool PreserveLCSSA, AssumptionCache *AC) { // Don't try to separate loops without a preheader. @@ -226,7 +237,7 @@ BasicBlock *Header = L->getHeader(); assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); - PHINode *PN = findPHIToPartitionLoops(L, DT, AC); + PHINode *PN = findPHIToPartitionLoops(L, DDT, AC); if (!PN) return nullptr; // No known way to partition. // Pull out all predecessors that have varying values in the loop. This @@ -251,7 +262,7 @@ SE->forgetLoop(L); BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", - DT, LI, PreserveLCSSA); + DDT, LI, PreserveLCSSA); // Make sure that NewBB is put someplace intelligent, which doesn't mess up // code layout too horribly. @@ -280,9 +291,10 @@ // Determine which blocks should stay in L and which should be moved out to // the Outer loop now. std::set BlocksInL; + DominatorTree &DT = DDT->getDT(); for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) { BasicBlock *P = *PI; - if (DT->dominates(Header, P)) + if (DT.dominates(Header, P)) addBlockAndPredsToSet(P, Header, BlocksInL); } @@ -314,7 +326,7 @@ // Split edges to exit blocks from the inner loop, if they emerged in the // process of separating the outer one. - formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); + formDedicatedExitBlocks(L, DDT, LI, PreserveLCSSA); if (PreserveLCSSA) { // Fix LCSSA form for L. Some values, which previously were only used inside @@ -323,9 +335,10 @@ // We don't need to form LCSSA recursively, because there cannot be uses // inside a newly created loop of defs from inner loops as those would // already be a use of an LCSSA phi node. - formLCSSA(*L, *DT, LI, SE); + DominatorTree &DT = DDT->getDT(); + formLCSSA(*L, DT, LI, SE); - assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) && + assert(NewOuter->isRecursivelyLCSSAForm(DT, *LI) && "LCSSA is broken after separating nested loops!"); } @@ -339,7 +352,8 @@ /// and have that block branch to the loop header. This ensures that loops /// have exactly one backedge. static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, - DominatorTree *DT, LoopInfo *LI) { + DeferredDominance *DDT, + LoopInfo *LI) { assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); // Get information about the loop @@ -452,14 +466,20 @@ L->addBasicBlockToLoop(BEBlock, *LI); // Update dominator information - DT->splitBlock(BEBlock); + std::vector Updates; + Updates.push_back({DominatorTree::UpdateKind::Insert, BEBlock, Header}); + for (auto *BB : BackedgeBlocks) { + Updates.push_back({DominatorTree::UpdateKind::Insert, BB, BEBlock}); + Updates.push_back({DominatorTree::UpdateKind::Delete, BB, Header}); + } + DDT->applyUpdates(Updates); return BEBlock; } /// \brief Simplify one loop and queue further loops for simplification. static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, - DominatorTree *DT, LoopInfo *LI, + DeferredDominance *DDT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA) { bool Changed = false; @@ -489,7 +509,7 @@ // Zap the dead pred's terminator and replace it with unreachable. TerminatorInst *TI = P->getTerminator(); - changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA); + changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA, DDT); Changed = true; } } @@ -521,7 +541,7 @@ // Does the loop already have a preheader? If so, don't insert one. BasicBlock *Preheader = L->getLoopPreheader(); if (!Preheader) { - Preheader = InsertPreheaderForLoop(L, DT, LI, PreserveLCSSA); + Preheader = InsertPreheaderForLoop(L, DDT, LI, PreserveLCSSA); if (Preheader) Changed = true; } @@ -530,7 +550,7 @@ // predecessors that are inside of the loop. This check guarantees that the // loop preheader/header will dominate the exit blocks. If the exit block has // predecessors from outside of the loop, split the edge now. - if (formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA)) + if (formDedicatedExitBlocks(L, DDT, LI, PreserveLCSSA)) Changed = true; // If the header has more than two predecessors at this point (from the @@ -541,8 +561,8 @@ // this for loops with a giant number of backedges, just factor them into a // common backedge instead. if (L->getNumBackEdges() < 8) { - if (Loop *OuterL = - separateNestedLoop(L, Preheader, DT, LI, SE, PreserveLCSSA, AC)) { + if (Loop *OuterL = separateNestedLoop(L, Preheader, DDT, LI, SE, + PreserveLCSSA, AC)) { ++NumNested; // Enqueue the outer loop as it should be processed next in our // depth-first nest walk. @@ -559,12 +579,13 @@ // If we either couldn't, or didn't want to, identify nesting of the loops, // insert a new block that all backedges target, then make it jump to the // loop header. - LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI); + LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DDT, LI); if (LoopLatch) Changed = true; } const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + DominatorTree *DT = DDT ? &DDT->getDT() : nullptr; // Scan over the PHI nodes in the loop header. Since they now have only two // incoming values (the loop is canonicalized), we may have simplified the PHI @@ -607,7 +628,8 @@ if (HasUniqueExitBlock()) { for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { BasicBlock *ExitingBlock = ExitingBlocks[i]; - if (!ExitingBlock->getSinglePredecessor()) continue; + BasicBlock *Predecessor = ExitingBlock->getSinglePredecessor(); + if (!Predecessor) continue; BranchInst *BI = dyn_cast(ExitingBlock->getTerminator()); if (!BI || !BI->isConditional()) continue; CmpInst *CI = dyn_cast(BI->getCondition()); @@ -640,6 +662,26 @@ } if (!AllInvariant) continue; + // Get a list of required Dominator Tree edge updates + std::vector DTUpdates; + TerminatorInst *PredecessorTerm = Predecessor->getTerminator(); + BasicBlock *PredOtherBB = ExitingBlock == PredecessorTerm->getSuccessor(0) + ? PredecessorTerm->getNumSuccessors() == 1 + ? nullptr + : PredecessorTerm->getSuccessor(1) + : PredecessorTerm->getSuccessor(0); + BasicBlock *OtherBB = PredOtherBB == BI->getSuccessor(0) + ? BI->getSuccessor(1) + : BI->getSuccessor(0); + DTUpdates.push_back( + {DominatorTree::UpdateKind::Insert, Predecessor, OtherBB}); + DTUpdates.push_back( + {DominatorTree::UpdateKind::Delete, Predecessor, ExitingBlock}); + DTUpdates.push_back({DominatorTree::UpdateKind::Delete, ExitingBlock, + BI->getSuccessor(0)}); + DTUpdates.push_back({DominatorTree::UpdateKind::Delete, ExitingBlock, + BI->getSuccessor(1)}); + // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. @@ -662,27 +704,20 @@ Changed = true; LI->removeBlock(ExitingBlock); - DomTreeNode *Node = DT->getNode(ExitingBlock); - const std::vector *> &Children = - Node->getChildren(); - while (!Children.empty()) { - DomTreeNode *Child = Children.front(); - DT->changeImmediateDominator(Child, Node->getIDom()); - } - DT->eraseNode(ExitingBlock); - BI->getSuccessor(0)->removePredecessor( ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA); BI->getSuccessor(1)->removePredecessor( ExitingBlock, /* DontDeleteUselessPHIs */ PreserveLCSSA); - ExitingBlock->eraseFromParent(); + + DDT->deleteBB(ExitingBlock); + DDT->applyUpdates(DTUpdates); } } return Changed; } -bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, +bool llvm::simplifyLoop(Loop *L, DeferredDominance *DDT, LoopInfo *LI, ScalarEvolution *SE, AssumptionCache *AC, bool PreserveLCSSA) { bool Changed = false; @@ -691,9 +726,10 @@ // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA // form. if (PreserveLCSSA) { - assert(DT && "DT not available."); + assert(DDT && "DT not available."); + DominatorTree &DT = DDT->getDT(); assert(LI && "LI not available."); - assert(L->isRecursivelyLCSSAForm(*DT, *LI) && + assert(L->isRecursivelyLCSSAForm(DT, *LI) && "Requested to preserve LCSSA, but it's already broken."); } #endif @@ -711,12 +747,23 @@ } while (!Worklist.empty()) - Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE, + Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DDT, LI, SE, AC, PreserveLCSSA); return Changed; } +bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, AssumptionCache *AC, + bool PreserveLCSSA) { + if (DT) { + DeferredDominance DDT(*DT); + return simplifyLoop(L, &DDT, LI, SE, AC, PreserveLCSSA); + } + return simplifyLoop(L, (DeferredDominance *)nullptr, LI, SE, AC, + PreserveLCSSA); +} + namespace { struct LoopSimplify : public FunctionPass { static char ID; // Pass identification, replacement for typeid @@ -732,6 +779,7 @@ // We need loop information to identify the loops... AU.addRequired(); AU.addPreserved(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); @@ -771,16 +819,21 @@ bool Changed = false; LoopInfo *LI = &getAnalysis().getLoopInfo(); DominatorTree *DT = &getAnalysis().getDomTree(); + auto *PDTWP = getAnalysisIfAvailable(); + PostDominatorTree *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; auto *SEWP = getAnalysisIfAvailable(); ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr; AssumptionCache *AC = &getAnalysis().getAssumptionCache(F); bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + DeferredDominance DDT(*DT, PDT); // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, SE, AC, PreserveLCSSA); + Changed |= simplifyLoop(*I, &DDT, LI, SE, AC, PreserveLCSSA); + + DDT.flush(); #ifndef NDEBUG if (PreserveLCSSA) { @@ -789,6 +842,7 @@ assert(InLCSSA && "LCSSA is broken after loop-simplify."); } #endif + return Changed; } @@ -797,19 +851,22 @@ bool Changed = false; LoopInfo *LI = &AM.getResult(F); DominatorTree *DT = &AM.getResult(F); + PostDominatorTree *PDT = AM.getCachedResult(F); ScalarEvolution *SE = AM.getCachedResult(F); AssumptionCache *AC = &AM.getResult(F); + DeferredDominance DDT(*DT, PDT); // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA // after simplifying the loops. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, SE, AC, /*PreserveLCSSA*/ false); + Changed |= simplifyLoop(*I, &DDT, LI, SE, AC, /*PreserveLCSSA*/ false); if (!Changed) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); + PA.preserve(); PA.preserve(); PA.preserve(); PA.preserve(); Index: lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- lib/Transforms/Utils/LoopUtils.cpp +++ lib/Transforms/Utils/LoopUtils.cpp @@ -1064,8 +1064,8 @@ return true; } -bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, - bool PreserveLCSSA) { +bool llvm::formDedicatedExitBlocks(Loop *L, DeferredDominance *DDT, + LoopInfo *LI, bool PreserveLCSSA) { bool Changed = false; // We re-use a vector for the in-loop predecesosrs. @@ -1097,7 +1097,7 @@ return false; auto *NewExitBB = SplitBlockPredecessors( - BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA); + BB, InLoopPredecessors, ".loopexit", DDT, LI, PreserveLCSSA); if (!NewExitBB) DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " @@ -1127,6 +1127,16 @@ return Changed; } +bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, + bool PreserveLCSSA) { + if (DT) { + DeferredDominance DDT(*DT); + return formDedicatedExitBlocks(L, &DDT, LI, PreserveLCSSA); + } + return formDedicatedExitBlocks(L, (DeferredDominance *)nullptr, LI, + PreserveLCSSA); +} + /// \brief Returns the instructions that use values defined in the loop. SmallVector llvm::findDefsUsedOutsideOfLoop(Loop *L) { SmallVector UsedOutside; Index: test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll =================================================================== --- test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll +++ test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -sroa -loop-simplify -licm -disable-output -verify-dom-info -verify-loop-info +; RUN: opt < %s -sroa -postdomtree -loop-simplify -licm -disable-output -verify-dom-info -verify-loop-info define void @inflate() { entry: Index: test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll =================================================================== --- test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll +++ test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -domfrontier -loop-simplify -domfrontier -verify-dom-info -analyze +; RUN: opt < %s -postdomtree -domfrontier -loop-simplify -domfrontier -verify-dom-info -analyze define void @a() nounwind { Index: test/Transforms/LoopSimplify/indirectbr.ll =================================================================== --- test/Transforms/LoopSimplify/indirectbr.ll +++ test/Transforms/LoopSimplify/indirectbr.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-simplify -lcssa -verify-loop-info -verify-dom-info -S \ +; RUN: opt < %s -postdomtree -loop-simplify -lcssa -verify-loop-info -verify-dom-info -S \ ; RUN: | grep -F "indirectbr i8* %x, [label %L0, label %L1]" \ ; RUN: | count 6 Index: test/Transforms/LoopSimplify/merge-exits.ll =================================================================== --- test/Transforms/LoopSimplify/merge-exits.ll +++ test/Transforms/LoopSimplify/merge-exits.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info | FileCheck %s +; RUN: opt < %s -postdomtree -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info | FileCheck %s ; Loopsimplify should be able to merge the two loop exits ; into one, so that loop rotate can rotate the loop, so Index: test/Transforms/LoopSimplify/preserve-pdt.ll =================================================================== --- /dev/null +++ test/Transforms/LoopSimplify/preserve-pdt.ll @@ -0,0 +1,36 @@ +; RUN: opt -passes="require,loop-simplify,print" -verify-dom-info -S %s 2>&1 | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK: [1] <> {4294967295,4294967295} [0] +; CHECK: [2] %end {4294967295,4294967295} [1] +; CHECK: [3] %cleanup14 {4294967295,4294967295} [2] +; CHECK: [4] %f {4294967295,4294967295} [3] +; CHECK: [5] %entry {4294967295,4294967295} [4] +; CHECK: [4] %for.end.split {4294967295,4294967295} [3] +; CHECK: [2] %for.inc {4294967295,4294967295} [1] +; CHECK: Roots: %end %for.inc + +define void @e() local_unnamed_addr { +entry: + br label %f + +f: + br i1 undef, label %for.end.split, label %cleanup14 + +for.inc: + switch i4 undef, label %cleanup14 [ + i4 0, label %for.end.split + i4 -8, label %for.end.split + ] + +for.end.split: + br label %cleanup14 + +cleanup14: + br i1 undef, label %f, label %end + +end: + ret void +} Index: test/Transforms/LoopSimplify/unreachable-loop-pred.ll =================================================================== --- test/Transforms/LoopSimplify/unreachable-loop-pred.ll +++ test/Transforms/LoopSimplify/unreachable-loop-pred.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -loop-simplify -disable-output -verify-loop-info -verify-dom-info < %s +; RUN: opt -S -postdomtree -loop-simplify -disable-output -verify-loop-info -verify-dom-info < %s ; PR5235 ; When loopsimplify inserts a preheader for this loop, it should add the new Index: unittests/IR/DeferredDominanceTest.cpp =================================================================== --- unittests/IR/DeferredDominanceTest.cpp +++ unittests/IR/DeferredDominanceTest.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/PostDominators.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" @@ -52,8 +53,10 @@ // Make the DDT. DominatorTree DT(*F); - DeferredDominance DDT(DT); - ASSERT_TRUE(DDT.flush().verify()); + PostDominatorTree PDT(*F); + DeferredDominance DDT(DT, &PDT); + ASSERT_TRUE(DDT.getDT().verify()); + ASSERT_TRUE(DDT.getPDT()->verify()); Function::iterator FI = F->begin(); BasicBlock *BB0 = &*FI++; @@ -101,7 +104,8 @@ // Verify. Updates to DDT must be applied *after* all changes to the CFG // (including block deletion). DDT.applyUpdates(Updates); - ASSERT_TRUE(DDT.flush().verify()); + ASSERT_TRUE(DDT.getDT().verify()); + ASSERT_TRUE(DDT.getPDT()->verify()); } TEST(DeferredDominance, PairedUpdate) { @@ -127,8 +131,10 @@ // Make the DDT. DominatorTree DT(*F); - DeferredDominance DDT(DT); - ASSERT_TRUE(DDT.flush().verify()); + PostDominatorTree PDT(*F); + DeferredDominance DDT(DT, &PDT); + ASSERT_TRUE(DDT.getDT().verify()); + ASSERT_TRUE(DDT.getPDT()->verify()); Function::iterator FI = F->begin(); BasicBlock *BB0 = &*FI++; @@ -166,7 +172,8 @@ // The DT has no changes, this flush() simply returns a reference to the // internal DT calculated at the beginning of this test. - ASSERT_TRUE(DDT.flush().verify()); + ASSERT_TRUE(DDT.getDT().verify()); + ASSERT_TRUE(DDT.getPDT()->verify()); } TEST(DeferredDominance, ReplaceEntryBB) { @@ -186,8 +193,10 @@ // Make the DDT. DominatorTree DT(*F); - DeferredDominance DDT(DT); - ASSERT_TRUE(DDT.flush().verify()); + PostDominatorTree PDT(*F); + DeferredDominance DDT(DT, &PDT); + ASSERT_TRUE(DDT.getDT().verify()); + ASSERT_TRUE(DDT.getPDT()->verify()); Function::iterator FI = F->begin(); BasicBlock *BB0 = &*FI++; @@ -207,7 +216,8 @@ // Changing the Entry BB requires a full recalulation. DDT.recalculate(*F); - ASSERT_TRUE(DDT.flush().verify()); + ASSERT_TRUE(DDT.getDT().verify()); + ASSERT_TRUE(DDT.getPDT()->verify()); // CFG Change: remove new_edge -> bb0 and redirect to new_edge -> bb1. EXPECT_EQ(NewEntry->getTerminator()->getNumSuccessors(), 1u); @@ -219,7 +229,8 @@ // Child of F. DDT.applyUpdates({{DominatorTree::Delete, NewEntry, BB0}, {DominatorTree::Insert, NewEntry, BB1}}); - ASSERT_TRUE(DDT.flush().verify()); + ASSERT_TRUE(DDT.getDT().verify()); + ASSERT_TRUE(DDT.getPDT()->verify()); // Now remove bb0 from F. ASSERT_FALSE(isa(BB0->getTerminator())); @@ -233,7 +244,8 @@ // do this to test the case when there are no pending DT updates but there are // pending deleted BBs. DDT.recalculate(*F); - ASSERT_TRUE(DDT.flush().verify()); + ASSERT_TRUE(DDT.getDT().verify()); + ASSERT_TRUE(DDT.getPDT()->verify()); } TEST(DeferredDominance, InheritedPreds) { @@ -261,8 +273,10 @@ // Make the DDT. DominatorTree DT(*F); - DeferredDominance DDT(DT); - ASSERT_TRUE(DDT.flush().verify()); + PostDominatorTree PDT(*F); + DeferredDominance DDT(DT, &PDT); + ASSERT_TRUE(DDT.getDT().verify()); + ASSERT_TRUE(DDT.getPDT()->verify()); Function::iterator FI = F->begin(); BasicBlock *BB0 = &*FI++; @@ -340,5 +354,6 @@ // Verify everything. DDT.applyUpdates(Updates); - ASSERT_TRUE(DDT.flush().verify()); + ASSERT_TRUE(DDT.getDT().verify()); + ASSERT_TRUE(DDT.getPDT()->verify()); } Index: unittests/Transforms/Scalar/LoopPassManagerTest.cpp =================================================================== --- unittests/Transforms/Scalar/LoopPassManagerTest.cpp +++ unittests/Transforms/Scalar/LoopPassManagerTest.cpp @@ -10,6 +10,7 @@ #include "llvm/Transforms/Scalar/LoopPassManager.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -299,6 +300,7 @@ // We need DominatorTreeAnalysis for LoopAnalysis. FAM.registerPass([&] { return DominatorTreeAnalysis(); }); + FAM.registerPass([&] { return PostDominatorTreeAnalysis(); }); FAM.registerPass([&] { return LoopAnalysis(); }); // We also allow loop passes to assume a set of other analyses and so need // those. Index: unittests/Transforms/Utils/Local.cpp =================================================================== --- unittests/Transforms/Utils/Local.cpp +++ unittests/Transforms/Utils/Local.cpp @@ -322,7 +322,7 @@ ConstantFoldTerminator(BB, true, nullptr, &DDT); } - EXPECT_TRUE(DDT.flush().verify()); + EXPECT_TRUE(DDT.getDT().verify()); }; runWithDomTree(*M, "br_same_dest", CFAllTerminators);