diff --git a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp --- a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp +++ b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp @@ -121,6 +121,8 @@ bool canSinkStoresAndGEPs(StoreInst *S0, StoreInst *S1) const; void sinkStoresAndGEPs(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); + bool canSinkStoresWithSameGEPs(StoreInst *S0, StoreInst *S1) const; + void sinkStores(BasicBlock *BB, StoreInst *SinkCand, StoreInst *ElseInst); bool mergeStores(BasicBlock *BB); }; } // end anonymous namespace @@ -244,9 +246,9 @@ // Only one definition? auto *A0 = dyn_cast(S0->getPointerOperand()); auto *A1 = dyn_cast(S1->getPointerOperand()); - LLVM_DEBUG(dbgs() << "Sink Instruction into BB \n"; BB->dump(); - dbgs() << "Instruction Left\n"; S0->dump(); dbgs() << "\n"; - dbgs() << "Instruction Right\n"; S1->dump(); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Sink Stores and GEPs into BB \n"; BB->dump(); + dbgs() << "Store Left\n"; S0->dump(); dbgs() << "\n"; + dbgs() << "Store Right\n"; S1->dump(); dbgs() << "\n"); // Hoist the instruction. BasicBlock::iterator InsertPt = BB->getFirstInsertionPt(); // Intersect optional metadata. @@ -274,6 +276,40 @@ } /// +/// Check if 2 stores can be sunk if they have the same GEP +/// +bool MergedLoadStoreMotion::canSinkStoresWithSameGEPs(StoreInst *S0, + StoreInst *S1) const { + auto *A0 = dyn_cast(S0->getPointerOperand()); + auto *A1 = dyn_cast(S1->getPointerOperand()); + return A0 && A1 && A0 == A1; +} + +/// +/// Merge two stores to same address and sink into \p BB +/// +void MergedLoadStoreMotion::sinkStores(BasicBlock *BB, StoreInst *S0, + StoreInst *S1) { + LLVM_DEBUG(dbgs() << "Sink Stores into BB \n"; BB->dump(); + dbgs() << "Store Left\n"; S0->dump(); dbgs() << "\n"; + dbgs() << "Store Right\n"; S1->dump(); dbgs() << "\n"); + BasicBlock::iterator InsertPt = BB->getFirstInsertionPt(); + // Intersect optional metadata. + S0->andIRFlags(S1); + S0->dropUnknownNonDebugMetadata(); + + // Create the new store to be inserted at the join point. + StoreInst *SNew = cast(S0->clone()); + SNew->insertBefore(&*InsertPt); + + // New PHI operand? Use it. + if (PHINode *NewPN = getPHIOperand(BB, S0, S1)) + SNew->setOperand(0, NewPN); + S0->eraseFromParent(); + S1->eraseFromParent(); +} + +/// /// True when two stores are equivalent and can sink into the footer /// /// Starting from a diamond head block, iterate over the instructions in one @@ -318,7 +354,9 @@ if (NStores * Size1 >= MagicCompileTimeControl) break; if (StoreInst *S1 = canSinkFromBlock(Pred1, S0)) { - if (!canSinkStoresAndGEPs(S0, S1)) + bool SinkWithGEPs = canSinkStoresAndGEPs(S0, S1); + bool SinkOnlyStores = canSinkStoresWithSameGEPs(S0, S1); + if (!SinkWithGEPs && !SinkOnlyStores) // Don't attempt to sink below stores that had to stick around // But after removal of a store and some of its feeding // instruction search again from the beginning since the iterator @@ -334,7 +372,10 @@ } MergedStores = true; - sinkStoresAndGEPs(SinkBB, S0, S1); + if (SinkWithGEPs) + sinkStoresAndGEPs(SinkBB, S0, S1); + else if (SinkOnlyStores) + sinkStores(SinkBB, S0, S1); RBI = Pred0->rbegin(); RBE = Pred0->rend(); LLVM_DEBUG(dbgs() << "Search again\n"; Instruction *I = &*RBI; I->dump()); @@ -351,7 +392,7 @@ // Merge unconditional branches, allowing PRE to catch more // optimization opportunities. - // This loop doesn't care about newly inserted/split blocks + // This loop doesn't care about newly inserted/split blocks // since they never will be diamond heads. for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { BasicBlock *BB = &*FI++; diff --git a/llvm/test/Transforms/InstMerge/st_sink_no_geps.ll b/llvm/test/Transforms/InstMerge/st_sink_no_geps.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstMerge/st_sink_no_geps.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; Test to make sure that we sink stores if they have common GEP. +; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require,mldst-motion' -S < %s 2>&1 | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +; Function Attrs: nounwind uwtable +define dso_local void @st_sink_no_geps(i32* nocapture %arg1, i1 zeroext %arg2) local_unnamed_addr { +; CHECK-LABEL: @st_sink_no_geps( +; CHECK-NEXT: bb1: +; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds i32, i32* [[ARG1:%.*]], i64 1 +; CHECK-NEXT: br i1 [[ARG2:%.*]], label [[BB2:%.*]], label [[BB3:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb3: +; CHECK-NEXT: br label [[BB4]] +; CHECK: bb4: +; CHECK-NEXT: store i32 42, i32* [[TMP]], align 4 +; CHECK-NEXT: ret void +; +bb1: + %tmp = getelementptr inbounds i32, i32* %arg1, i64 1 + br i1 %arg2, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + store i32 42, i32* %tmp, align 4 + br label %bb4 + +bb3: ; preds = %bb1 + store i32 42, i32* %tmp, align 4 + br label %bb4 + +bb4: ; preds = %bb2, %bb3 + ret void +}