Index: llvm/include/llvm/Transforms/Utils/SSAUpdater.h =================================================================== --- llvm/include/llvm/Transforms/Utils/SSAUpdater.h +++ llvm/include/llvm/Transforms/Utils/SSAUpdater.h @@ -169,6 +169,10 @@ /// Called to update debug info associated with the instruction. virtual void updateDebugInfo(Instruction *I) const {} + + /// Return false if a sub-class wants to keep one of the loads/stores + /// after the SSA construction. + virtual bool shouldDelete(Instruction *I) const { return true; } }; } // end namespace llvm Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -1860,6 +1860,7 @@ bool UnorderedAtomic; AAMDNodes AATags; ICFLoopSafetyInfo &SafetyInfo; + bool CanInsertStoresInExitBlocks; // We're about to add a use of V in a loop exit block. Insert an LCSSA phi // (if legal) if doing so would add an out-of-loop use to an instruction @@ -1886,12 +1887,13 @@ SmallVectorImpl &MSSAIP, PredIteratorCache &PIC, MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl, Align Alignment, bool UnorderedAtomic, const AAMDNodes &AATags, - ICFLoopSafetyInfo &SafetyInfo) + ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)), Alignment(Alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags), - SafetyInfo(SafetyInfo) {} + SafetyInfo(SafetyInfo), + CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {} bool isInstInList(Instruction *I, const SmallVectorImpl &) const override { @@ -1903,7 +1905,7 @@ return PointerMustAliases.count(Ptr); } - void doExtraRewritesBeforeFinalDeletion() override { + void insertStoresInLoopExitBlocks() { // Insert stores after in the loop exit blocks. Each exit block gets a // store of the live-out values that feed them. Since we've already told // the SSA updater about the defs in the loop and the preheader @@ -1937,10 +1939,21 @@ } } + void doExtraRewritesBeforeFinalDeletion() override { + if (CanInsertStoresInExitBlocks) + insertStoresInLoopExitBlocks(); + } + void instructionDeleted(Instruction *I) const override { SafetyInfo.removeInstruction(I); MSSAU->removeMemoryAccess(I); } + + bool shouldDelete(Instruction *I) const override { + if (isa(I)) + return CanInsertStoresInExitBlocks; + return true; + } }; bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L, @@ -2039,6 +2052,7 @@ bool DereferenceableInPH = false; bool SafeToInsertStore = false; + bool FoundLoadToPromote = false; SmallVector LoopUses; @@ -2086,6 +2100,7 @@ SawUnorderedAtomic |= Load->isAtomic(); SawNotAtomic |= !Load->isAtomic(); + FoundLoadToPromote = true; Align InstAlignment = Load->getAlign(); @@ -2197,13 +2212,20 @@ } } - // If we've still failed to prove we can sink the store, give up. - if (!SafeToInsertStore) + // If we've still failed to prove we can sink the store, hoist the load + // only, if possible. + if (!SafeToInsertStore && !FoundLoadToPromote) + // If we cannot hoist the load either, give up. return false; - // Otherwise, this is safe to promote, lets do it! - LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr - << '\n'); + // Lets do the promotion! + if (SafeToInsertStore) + LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr + << '\n'); + else + LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr + << '\n'); + ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0]) @@ -2222,7 +2244,8 @@ SSAUpdater SSA(&NewPHIs); LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL, - Alignment, SawUnorderedAtomic, AATags, *SafetyInfo); + Alignment, SawUnorderedAtomic, AATags, *SafetyInfo, + SafeToInsertStore); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. Index: llvm/lib/Transforms/Utils/SSAUpdater.cpp =================================================================== --- llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -446,6 +446,9 @@ // Now that everything is rewritten, delete the old instructions from the // function. They should all be dead now. for (Instruction *User : Insts) { + if (!shouldDelete(User)) + continue; + // If this is a load that still has uses, then the load must have been added // as a live value in the SSAUpdate data structure for a block (e.g. because // the loaded value was stored later). In this case, we need to recursively Index: llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll =================================================================== --- llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll +++ llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll @@ -5,12 +5,12 @@ ; RUN: opt -O2 -S < %s | FileCheck %s ; CHECK-LABEL: main -; CHECK: if.end -; CHECK: store ; CHECK: memset ; CHECK: if.then ; CHECK: store -; CHECK: memset +; CHECK: if.end +; CHECK: store +; CHECK: store @d = common global i32 0, align 4 @b = common global i32 0, align 4 Index: llvm/test/Transforms/LICM/hoist-load-without-store.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/hoist-load-without-store.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -S < %s | FileCheck %s + +;; C reproducer: +;; void f(int *ptr, int n) { +;; for (int i = 0; i < n; ++i) { +;; int x = *ptr; +;; if (x) +;; break; +;; +;; *ptr = x + 1; +;; } +;; } + +define dso_local void @f(i32* nocapture %ptr, i32 %n) { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_LR_PH:%.*]], label [[CLEANUP1:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[PTR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ 1, [[IF_END:%.*]] ] +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ] +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[FOR_BODY_CLEANUP1_CRIT_EDGE:%.*]] +; CHECK: if.end: +; CHECK-NEXT: store i32 1, i32* [[PTR]], align 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP1_CRIT_EDGE:%.*]] +; CHECK: for.body.cleanup1_crit_edge: +; CHECK-NEXT: br label [[CLEANUP1]] +; CHECK: for.cond.cleanup1_crit_edge: +; CHECK-NEXT: br label [[CLEANUP1]] +; CHECK: cleanup1: +; CHECK-NEXT: ret void +; +entry: + %cmp7 = icmp slt i32 0, %n + br i1 %cmp7, label %for.body.lr.ph, label %cleanup1 + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %if.end + %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ] + %0 = load i32, i32* %ptr, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %for.body.cleanup1_crit_edge + +if.end: ; preds = %for.body + store i32 1, i32* %ptr, align 4 + %inc = add nuw nsw i32 %i.08, 1 + %cmp = icmp slt i32 %inc, %n + br i1 %cmp, label %for.body, label %for.cond.cleanup1_crit_edge + +for.body.cleanup1_crit_edge: ; preds = %for.body + br label %cleanup1 + +for.cond.cleanup1_crit_edge: ; preds = %if.end + br label %cleanup1 + +cleanup1: ; preds = %for.cond.cleanup1_crit_edge, %for.body.cleanup1_crit_edge, %entry + ret void +} Index: llvm/test/Transforms/LICM/promote-capture.ll =================================================================== --- llvm/test/Transforms/LICM/promote-capture.ll +++ llvm/test/Transforms/LICM/promote-capture.ll @@ -111,17 +111,19 @@ ; CHECK-NEXT: [[COUNT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 0, i32* [[COUNT]], align 4 ; CHECK-NEXT: call void @capture(i32* [[COUNT]]) +; CHECK-NEXT: [[COUNT_PROMOTED:%.*]] = load i32, i32* [[COUNT]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[C_INC2:%.*]] = phi i32 [ [[COUNT_PROMOTED]], [[ENTRY:%.*]] ], [ [[C_INC1:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[I_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[COND:%.*]] = call i1 @cond(i32 [[I]]) ; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[LATCH]] ; CHECK: if: -; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[COUNT]], align 4 -; CHECK-NEXT: [[C_INC:%.*]] = add i32 [[C]], 1 +; CHECK-NEXT: [[C_INC:%.*]] = add i32 [[C_INC2]], 1 ; CHECK-NEXT: store i32 [[C_INC]], i32* [[COUNT]], align 4 ; CHECK-NEXT: br label [[LATCH]] ; CHECK: latch: +; CHECK-NEXT: [[C_INC1]] = phi i32 [ [[C_INC]], [[IF]] ], [ [[C_INC2]], [[LOOP]] ] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[LEN:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] Index: llvm/test/Transforms/LICM/scalar-promote-memmodel.ll =================================================================== --- llvm/test/Transforms/LICM/scalar-promote-memmodel.ll +++ llvm/test/Transforms/LICM/scalar-promote-memmodel.ll @@ -11,19 +11,21 @@ ; CHECK-LABEL: @bar( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[B:%.*]], 0 +; CHECK-NEXT: [[G_PROMOTED:%.*]] = load i32, i32* @g, align 4 ; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC5:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[INC2:%.*]] = phi i32 [ [[G_PROMOTED]], [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC5:%.*]], [[FOR_INC]] ] ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body: ; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_INC]], label [[IF_THEN:%.*]] ; CHECK: if.then: -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* @g, align 4 -; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[INC2]], 1 ; CHECK-NEXT: store i32 [[INC]], i32* @g, align 4 ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: +; CHECK-NEXT: [[INC1]] = phi i32 [ [[INC]], [[IF_THEN]] ], [ [[INC2]], [[FOR_BODY]] ] ; CHECK-NEXT: [[INC5]] = add nsw i32 [[I_0]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: Index: llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll =================================================================== --- llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll +++ llvm/test/Transforms/LICM/scalar-promote-opaque-ptrs.ll @@ -314,17 +314,19 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @capture(ptr [[LOCAL]]) +; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, ptr [[LOCAL]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[LOCAL]], align 4 -; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X]]) +; CHECK-NEXT: [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ] +; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X22]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X2]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]] ; CHECK: if: ; CHECK-NEXT: store i32 [[X2]], ptr [[LOCAL]], align 4 ; CHECK-NEXT: br label [[ELSE]] ; CHECK: else: +; CHECK-NEXT: [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ] ; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] Index: llvm/test/Transforms/LICM/scalar-promote.ll =================================================================== --- llvm/test/Transforms/LICM/scalar-promote.ll +++ llvm/test/Transforms/LICM/scalar-promote.ll @@ -315,17 +315,19 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @capture(i32* [[LOCAL]]) +; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, i32* [[LOCAL]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[LOCAL]], align 4 -; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X]]) +; CHECK-NEXT: [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ] +; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X22]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X2]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]] ; CHECK: if: ; CHECK-NEXT: store i32 [[X2]], i32* [[LOCAL]], align 4 ; CHECK-NEXT: br label [[ELSE]] ; CHECK: else: +; CHECK-NEXT: [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ] ; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]