diff --git a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp --- a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -14,9 +14,11 @@ // diamond (hammock) and merges them into a single load in the header. Similar // it sinks and merges two stores to the tail block (footer). The algorithm // iterates over the instructions of one side of the diamond and attempts to -// find a matching load/store on the other side. It hoists / sinks when it -// thinks it safe to do so. This optimization helps with eg. hiding load -// latencies, triggering if-conversion, and reducing static code size. +// find a matching load/store on the other side. New tail/footer block may be +// insterted if the tail/footer block has more predecessors (not only the two +// predecessors that are forming the diamond). It hoists / sinks when it thinks +// it safe to do so. This optimization helps with eg. hiding load latencies, +// triggering if-conversion, and reducing static code size. // // NOTE: This code no longer performs load hoisting, it is subsumed by GVNHoist. // @@ -114,7 +116,9 @@ PHINode *getPHIOperand(BasicBlock *BB, StoreInst *S0, StoreInst *S1); bool isStoreSinkBarrierInRange(const Instruction &Start, const Instruction &End, MemoryLocation Loc); - bool sinkStore(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); + bool canSinkStoresAndGEPs(StoreInst *S0, StoreInst *S1) const; + void sinkStoresAndGEPs(BasicBlock *BB, StoreInst *SinkCand, + StoreInst *ElseInst); bool mergeStores(BasicBlock *BB); }; } // end anonymous namespace @@ -217,74 +221,79 @@ } /// +/// Check if 2 stores can be sunk together with corresponding GEPs +/// +bool MergedLoadStoreMotion::canSinkStoresAndGEPs(StoreInst *S0, + StoreInst *S1) const { + auto *A0 = dyn_cast(S0->getPointerOperand()); + auto *A1 = dyn_cast(S1->getPointerOperand()); + return A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() && + (A0->getParent() == S0->getParent()) && A1->hasOneUse() && + (A1->getParent() == S1->getParent()) && isa(A0); +} + +/// /// Merge two stores to same address and sink into \p BB /// /// Also sinks GEP instruction computing the store address /// -bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0, - StoreInst *S1) { +void MergedLoadStoreMotion::sinkStoresAndGEPs(BasicBlock *BB, StoreInst *S0, + StoreInst *S1) { // Only one definition? auto *A0 = dyn_cast(S0->getPointerOperand()); auto *A1 = dyn_cast(S1->getPointerOperand()); - if (A0 && A1 && A0->isIdenticalTo(A1) && A0->hasOneUse() && - (A0->getParent() == S0->getParent()) && A1->hasOneUse() && - (A1->getParent() == S1->getParent()) && isa(A0)) { - LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump(); - dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n"; - dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n"); - // Hoist the instruction. - BasicBlock::iterator InsertPt = BB->getFirstInsertionPt(); - // Intersect optional metadata. - S0->andIRFlags(S1); - S0->dropUnknownNonDebugMetadata(); - - // Create the new store to be inserted at the join point. - StoreInst *SNew = cast(S0->clone()); - Instruction *ANew = A0->clone(); - SNew->insertBefore(&*InsertPt); - ANew->insertBefore(SNew); - - assert(S0->getParent() == A0->getParent()); - assert(S1->getParent() == A1->getParent()); - - // New PHI operand? Use it. - if (PHINode *NewPN = getPHIOperand(BB, S0, S1)) - SNew->setOperand(0, NewPN); - S0->eraseFromParent(); - S1->eraseFromParent(); - A0->replaceAllUsesWith(ANew); - A0->eraseFromParent(); - A1->replaceAllUsesWith(ANew); - A1->eraseFromParent(); - return true; - } - return false; + LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump(); + dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n"; + dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n"); + // Hoist the instruction. + BasicBlock::iterator InsertPt = BB->getFirstInsertionPt(); + // Intersect optional metadata. + S0->andIRFlags(S1); + S0->dropUnknownNonDebugMetadata(); + + // Create the new store to be inserted at the join point. + StoreInst *SNew = cast(S0->clone()); + Instruction *ANew = A0->clone(); + SNew->insertBefore(&*InsertPt); + ANew->insertBefore(SNew); + + assert(S0->getParent() == A0->getParent()); + assert(S1->getParent() == A1->getParent()); + + // New PHI operand? Use it. + if (PHINode *NewPN = getPHIOperand(BB, S0, S1)) + SNew->setOperand(0, NewPN); + S0->eraseFromParent(); + S1->eraseFromParent(); + A0->replaceAllUsesWith(ANew); + A0->eraseFromParent(); + A1->replaceAllUsesWith(ANew); + A1->eraseFromParent(); } /// /// True when two stores are equivalent and can sink into the footer /// -/// Starting from a diamond tail block, iterate over the instructions in one -/// predecessor block and try to match a store in the second predecessor. +/// Starting from a diamond head block, iterate over the instructions in one +/// successor block and try to match a store in the second successor. /// -bool MergedLoadStoreMotion::mergeStores(BasicBlock *T) { +bool MergedLoadStoreMotion::mergeStores(BasicBlock *HeadBB) { bool MergedStores = false; - assert(T && "Footer of a diamond cannot be empty"); - - pred_iterator PI = pred_begin(T), E = pred_end(T); - assert(PI != E); - BasicBlock *Pred0 = *PI; - ++PI; - BasicBlock *Pred1 = *PI; - ++PI; + BasicBlock *TailBB = getDiamondTail(HeadBB); + BasicBlock *SinkBB = TailBB; + assert(SinkBB && "Footer of a diamond cannot be empty"); + + succ_iterator SI = succ_begin(HeadBB); + assert(SI != succ_end(HeadBB) && "Diamond head cannot have zero successors"); + BasicBlock *Pred0 = *SI; + ++SI; + assert(SI != succ_end(HeadBB) && "Diamond head cannot have single successor"); + BasicBlock *Pred1 = *SI; // tail block of a diamond/hammock? if (Pred0 == Pred1) return false; // No. - if (PI != E) - return false; // No. More than 2 predecessors. - - // #Instructions in Succ1 for Compile Time Control + // #Instructions in Pred1 for Compile Time Control auto InstsNoDbg = Pred1->instructionsWithoutDebug(); int Size1 = std::distance(InstsNoDbg.begin(), InstsNoDbg.end()); int NStores = 0; @@ -304,14 +313,23 @@ if (NStores * Size1 >= MagicCompileTimeControl) break; if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) { - bool Res = sinkStore(T, S0, S1); - MergedStores |= Res; - // Don't attempt to sink below stores that had to stick around - // But after removal of a store and some of its feeding - // instruction search again from the beginning since the iterator - // is likely stale at this point. - if (!Res) + if (!canSinkStoresAndGEPs(S0, S1)) + // Don't attempt to sink below stores that had to stick around + // But after removal of a store and some of its feeding + // instruction search again from the beginning since the iterator + // is likely stale at this point. break; + + if (SinkBB == TailBB && TailBB->hasNPredecessorsOrMore(3)) { + // We have more than 2 predecessors. Insert a new block + // postdominating 2 predecessors we're going to sink from. + SinkBB = SplitBlockPredecessors(TailBB, {Pred0, Pred1}, ".sink.split"); + if (!SinkBB) + break; + } + + MergedStores = true; + sinkStoresAndGEPs(SinkBB, S0, S1); RBI = Pred0->rbegin(); RBE = Pred0->rend(); LLVM_DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump()); @@ -334,7 +352,7 @@ // Hoist equivalent loads and sink stores // outside diamonds when possible if (isDiamondHead(BB)) { - Changed |= mergeStores(getDiamondTail(BB)); + Changed |= mergeStores(BB); } } return Changed; @@ -361,7 +379,6 @@ private: void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); } @@ -391,7 +408,6 @@ return PreservedAnalyses::all(); PreservedAnalyses PA; - PA.preserveSet(); PA.preserve(); return PA; } diff --git a/llvm/test/Transforms/InstMerge/st_sink_split_bb.ll b/llvm/test/Transforms/InstMerge/st_sink_split_bb.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstMerge/st_sink_split_bb.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Test to make sure that a new block is inserted if we +; have more than 2 predecessors for the block we're going to sink to. +; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +; Function Attrs: nounwind uwtable +define dso_local void @st_sink_split_bb(i32* nocapture %arg, i32* nocapture %arg1, i1 zeroext %arg2, i1 zeroext %arg3) local_unnamed_addr { +; CHECK-LABEL: @st_sink_split_bb( +; CHECK-NEXT: bb: +; CHECK-NEXT: br i1 [[ARG2:%.*]], label [[BB4:%.*]], label [[BB5:%.*]] +; CHECK: bb4: +; CHECK-NEXT: store i32 1, i32* [[ARG:%.*]], align 4 +; CHECK-NEXT: br label [[BB9:%.*]] +; CHECK: bb5: +; CHECK-NEXT: br i1 [[ARG3:%.*]], label [[BB6:%.*]], label [[BB7:%.*]] +; CHECK: bb6: +; CHECK-NEXT: store i32 2, i32* [[ARG]], align 4 +; CHECK-NEXT: br label [[BB9_SINK_SPLIT:%.*]] +; CHECK: bb7: +; CHECK-NEXT: store i32 3, i32* [[ARG]], align 4 +; CHECK-NEXT: br label [[BB9_SINK_SPLIT]] +; CHECK: bb9.sink.split: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[ARG1:%.*]], i64 1 +; CHECK-NEXT: store i32 3, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[ARG1]], i64 2 +; CHECK-NEXT: store i32 3, i32* [[TMP1]], align 4 +; CHECK-NEXT: br label [[BB9]] +; CHECK: bb9: +; CHECK-NEXT: ret void +; +bb: + br i1 %arg2, label %bb4, label %bb5 + +bb4: ; preds = %bb + store i32 1, i32* %arg, align 4 + br label %bb9 + +bb5: ; preds = %bb + br i1 %arg3, label %bb6, label %bb7 + +bb6: ; preds = %bb5 + store i32 2, i32* %arg, align 4 + %tmp1 = getelementptr inbounds i32, i32* %arg1, i64 1 + store i32 3, i32* %tmp1, align 4 + %tmp = getelementptr inbounds i32, i32* %arg1, i64 2 + store i32 3, i32* %tmp, align 4 + br label %bb9 + +bb7: ; preds = %bb5 + store i32 3, i32* %arg, align 4 + %tmp2 = getelementptr inbounds i32, i32* %arg1, i64 1 + store i32 3, i32* %tmp2, align 4 + %tmp8 = getelementptr inbounds i32, i32* %arg1, i64 2 + store i32 3, i32* %tmp8, align 4 + br label %bb9 + + +bb9: ; preds = %bb7, %bb6, %bb4 + ret void +}