Index: llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp =================================================================== --- llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -1,4 +1,4 @@ -//===- MergedLoadStoreMotion.cpp - merge and hoist/sink load/stores -------===// +//===- MergedLoadStoreMotion.cpp - merge and sink stores ------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,20 +7,18 @@ //===----------------------------------------------------------------------===// // //! \file -//! This pass performs merges of loads and stores on both sides of a -// diamond (hammock). It hoists the loads and sinks the stores. +//! This pass performs merges of stores on both sides of a +// diamond (hammock). // -// The algorithm iteratively hoists two loads to the same address out of a -// diamond (hammock) and merges them into a single load in the header. Similar -// it sinks and merges two stores to the tail block (footer). The algorithm -// iterates over the instructions of one side of the diamond and attempts to -// find a matching load/store on the other side. New tail/footer block may be -// insterted if the tail/footer block has more predecessors (not only the two -// predecessors that are forming the diamond). It hoists / sinks when it thinks -// it safe to do so. This optimization helps with eg. hiding load latencies, -// triggering if-conversion, and reducing static code size. -// -// NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist. +// The algorithm iteratively sinks two stores to the same address out of a +// diamond (hammock) and merges them into a single store to the tail block +// (footer). The algorithm iterates over the instructions of one side of the +// diamond and attempts to find a matching store on the other side. New +// tail/footer block may be insterted if the tail/footer block has more +// predecessors (not only the two predecessors that are forming the diamond). It +// sinks when it thinks it safe to do so. This optimization helps with eg. +// hiding load latencies, triggering if-conversion, and reducing static code +// size. // //===----------------------------------------------------------------------===// // @@ -34,10 +32,9 @@ // + + // + + // if.then: if.else: -// %lt = load %addr_l %le = load %addr_l -// // <...> <...> // store %st, %addr_s store %se, %addr_s +// <...> <...> // br label %if.end br label %if.end // + + // + + @@ -48,13 +45,11 @@ // Diamond shaped code after merge: // // header: -// %l = load %addr_l // br %cond, label %if.then, label %if.else // + + // + + // + + // if.then: if.else: -// // <...> <...> // br label %if.end br label %if.end // + + @@ -100,8 +95,8 @@ class MergedLoadStoreMotion { AliasAnalysis *AA = nullptr; - // The mergeLoad/Store algorithms could have Size0 * Size1 complexity, - // where Size0 and Size1 are the #instructions on the two sides of + // The mergeStores algorithms could have Size0 * Size1 complexity, + // where Size0 and Size1 are the number instructions on the two sides of // the diamond. The constant chosen here is arbitrary. Compiler Time // Control is enforced by the check Size0 * Size1 < MagicCompileTimeControl. const int MagicCompileTimeControl = 250; @@ -109,11 +104,11 @@ const bool SplitFooterBB; public: MergedLoadStoreMotion(bool SplitFooterBB) : SplitFooterBB(SplitFooterBB) {} - bool run(Function &F, AliasAnalysis &AA); + std::pair run(Function &F, AliasAnalysis &AA); private: - BasicBlock *getDiamondTail(BasicBlock *BB); - bool isDiamondHead(BasicBlock *BB); + bool isDiamondHead(BasicBlock *BB, BasicBlock *&Left, BasicBlock *&Right, + BasicBlock *&Tail); // Routines for sinking stores StoreInst *canSinkFromBlock(BasicBlock *BB, StoreInst *SI); PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); @@ -122,44 +117,36 @@ bool canSinkStoresAndGEPs(StoreInst *S0, StoreInst *S1) const; void sinkStoresAndGEPs(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); - bool mergeStores(BasicBlock *BB); + std::pair mergeStores(BasicBlock *Head, BasicBlock *Left, + BasicBlock *Right, BasicBlock *Tail); }; } // end anonymous namespace -/// -/// Return tail block of a diamond. -/// -BasicBlock *MergedLoadStoreMotion::getDiamondTail(BasicBlock *BB) { - assert(isDiamondHead(BB) && "Basic block is not head of a diamond"); - return BB->getTerminator()->getSuccessor(0)->getSingleSuccessor(); -} - /// /// True when BB is the head of a diamond (hammock) /// -bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB) { +bool MergedLoadStoreMotion::isDiamondHead(BasicBlock *BB, BasicBlock *&Left, + BasicBlock *&Right, + BasicBlock *&Tail) { if (!BB) return false; auto *BI = dyn_cast(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; - BasicBlock *Succ0 = BI->getSuccessor(0); - BasicBlock *Succ1 = BI->getSuccessor(1); + Left = BI->getSuccessor(0); + Right = BI->getSuccessor(1); - if (!Succ0->getSinglePredecessor()) + if (Left == Right) return false; - if (!Succ1->getSinglePredecessor()) + if (!Left->getSinglePredecessor()) return false; - - BasicBlock *Succ0Succ = Succ0->getSingleSuccessor(); - BasicBlock *Succ1Succ = Succ1->getSingleSuccessor(); - // Ignore triangles. - if (!Succ0Succ || !Succ1Succ || Succ0Succ != Succ1Succ) + if (!Right->getSinglePredecessor()) return false; - return true; -} + Tail = Left->getSingleSuccessor(); + return Tail && Tail == Right->getSingleSuccessor(); +} /// /// True when instruction is a sink barrier for a store @@ -280,86 +267,83 @@ /// Starting from a diamond head block, iterate over the instructions in one /// successor block and try to match a store in the second successor. /// -bool MergedLoadStoreMotion::mergeStores(BasicBlock *HeadBB) { - +std::pair MergedLoadStoreMotion::mergeStores(BasicBlock *HeadBB, + BasicBlock *LeftBB, + BasicBlock *RightBB, + BasicBlock *TailBB) { bool MergedStores = false; - BasicBlock *TailBB = getDiamondTail(HeadBB); + bool SplitFooter = false; BasicBlock *SinkBB = TailBB; - assert(SinkBB && "Footer of a diamond cannot be empty"); - - succ_iterator SI = succ_begin(HeadBB); - assert(SI != succ_end(HeadBB) && "Diamond head cannot have zero successors"); - BasicBlock *Pred0 = *SI; - ++SI; - assert(SI != succ_end(HeadBB) && "Diamond head cannot have single successor"); - BasicBlock *Pred1 = *SI; - // tail block of a diamond/hammock? - if (Pred0 == Pred1) - return false; // No. - // bail out early if we can not merge into the footer BB + // Bail out early if we can not merge into the footer BB. if (!SplitFooterBB && TailBB->hasNPredecessorsOrMore(3)) - return false; - // #Instructions in Pred1 for Compile Time Control - auto InstsNoDbg = Pred1->instructionsWithoutDebug(); + return {false, false}; + // #Instructions in LeftBB for Compile Time Control + auto InstsNoDbg = RightBB->instructionsWithoutDebug(); int Size1 = std::distance(InstsNoDbg.begin(), InstsNoDbg.end()); int NStores = 0; - for (BasicBlock::reverse_iterator RBI = Pred0->rbegin(), RBE = Pred0->rend(); - RBI != RBE;) { - - Instruction *I = &*RBI; - ++RBI; + Instruction *P = LeftBB->getTerminator(); + while (Instruction *I = P->getPrevNonDebugInstruction()) { + assert(P->getParent() == LeftBB); // Don't sink non-simple (atomic, volatile) stores. auto *S0 = dyn_cast(I); - if (!S0 || !S0->isSimple()) + if (!S0 || !S0->isSimple()) { + P = I; continue; + } ++NStores; if (NStores * Size1 >= MagicCompileTimeControl) break; - if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) { - if (!canSinkStoresAndGEPs(S0, S1)) - // Don't attempt to sink below stores that had to stick around - // But after removal of a store and some of its feeding - // instruction search again from the beginning since the iterator - // is likely stale at this point. - break; + StoreInst *S1 = canSinkFromBlock(RightBB, S0); + if (!S1) { + P = I; + continue; + } + if (!canSinkStoresAndGEPs(S0, S1)) + // Don't attempt to sink below stores that had to stick around + break; - if (SinkBB == TailBB && TailBB->hasNPredecessorsOrMore(3)) { - // We have more than 2 predecessors. Insert a new block - // postdominating 2 predecessors we're going to sink from. - SinkBB = SplitBlockPredecessors(TailBB, {Pred0, Pred1}, ".sink.split"); - if (!SinkBB) - break; - } - - MergedStores = true; - sinkStoresAndGEPs(SinkBB, S0, S1); - RBI = Pred0->rbegin(); - RBE = Pred0->rend(); - LLVM_DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump()); + if (SinkBB == TailBB && TailBB->hasNPredecessorsOrMore(3)) { + // We have more than 2 predecessors. Insert a new block + // postdominating 2 predecessors we're going to sink from. + SinkBB = SplitBlockPredecessors(TailBB, {LeftBB, RightBB}, ".sink.split"); + if (!SinkBB) + break; + SplitFooter = true; } + + MergedStores = true; + sinkStoresAndGEPs(SinkBB, S0, S1); } - return MergedStores; + return {MergedStores, SplitFooter}; } -bool MergedLoadStoreMotion::run(Function &F, AliasAnalysis &AA) { +std::pair MergedLoadStoreMotion::run(Function &F, + AliasAnalysis &AA) { this->AA = &AA; bool Changed = false; + bool CFGChanged = false; + LLVM_DEBUG(dbgs() << "Instruction Merger\n"); // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. // This loop doesn't care about newly inserted/split blocks // since they never will be diamond heads. - for (BasicBlock &BB : make_early_inc_range(F)) - // Hoist equivalent loads and sink stores - // outside diamonds when possible - if (isDiamondHead(&BB)) - Changed |= mergeStores(&BB); - return Changed; + for (BasicBlock &BB : make_early_inc_range(F)) { + BasicBlock *Left, *Right, *Tail; + if (isDiamondHead(&BB, Left, Right, Tail)) { + // Merge and sink store pairs outside diamonds when possible. + bool Change, ChangeCFG; + std::tie(Change, ChangeCFG) = mergeStores(&BB, Left, Right, Tail); + Changed |= Change; + CFGChanged |= ChangeCFG; + } + } + return {Changed, CFGChanged}; } namespace { @@ -380,7 +364,10 @@ if (skipFunction(F)) return false; MergedLoadStoreMotion Impl(SplitFooterBB); - return Impl.run(F, getAnalysis().getAAResults()); + bool Change, _; + std::tie(Change, _) = + Impl.run(F, getAnalysis().getAAResults()); + return Change; } private: @@ -412,11 +399,13 @@ MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) { MergedLoadStoreMotion Impl(Options.SplitFooterBB); auto &AA = AM.getResult(F); - if (!Impl.run(F, AA)) + bool Change, CFGChange; + std::tie(Change, CFGChange) = Impl.run(F, AA); + if (!Change && !CFGChange) return PreservedAnalyses::all(); PreservedAnalyses PA; - if (!Options.SplitFooterBB) + if (!CFGChange) PA.preserveSet(); return PA; }