Index: llvm/include/llvm/Transforms/Utils/SSAUpdater.h =================================================================== --- llvm/include/llvm/Transforms/Utils/SSAUpdater.h +++ llvm/include/llvm/Transforms/Utils/SSAUpdater.h @@ -169,6 +169,10 @@ /// Called to update debug info associated with the instruction. virtual void updateDebugInfo(Instruction *I) const {} + + /// Used to indicate that we want to keep the store as is (since it was not + /// safe for sinking), by hoisting load only. + virtual bool shouldDelete(Instruction *I) const { return true; } }; } // end namespace llvm Index: llvm/lib/Transforms/Scalar/LICM.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LICM.cpp +++ llvm/lib/Transforms/Scalar/LICM.cpp @@ -465,8 +465,8 @@ for (const SmallSetVector &PointerMustAliases : collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( - PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, - LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE); + PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, + DT, TLI, L, &MSSAU, &SafetyInfo, ORE); } Promoted |= LocalPromoted; } while (LocalPromoted); @@ -1858,6 +1858,7 @@ bool UnorderedAtomic; AAMDNodes AATags; ICFLoopSafetyInfo &SafetyInfo; + bool CanInsertStoresInExitBlocks; // We're about to add a use of V in a loop exit block. Insert an LCSSA phi // (if legal) if doing so would add an out-of-loop use to an instruction @@ -1884,12 +1885,13 @@ SmallVectorImpl &MSSAIP, PredIteratorCache &PIC, MemorySSAUpdater *MSSAU, LoopInfo &li, DebugLoc dl, int alignment, bool UnorderedAtomic, const AAMDNodes &AATags, - ICFLoopSafetyInfo &SafetyInfo) + ICFLoopSafetyInfo &SafetyInfo, bool CanInsertStoresInExitBlocks) : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA), LoopExitBlocks(LEB), LoopInsertPts(LIP), MSSAInsertPts(MSSAIP), PredCache(PIC), MSSAU(MSSAU), LI(li), DL(std::move(dl)), Alignment(alignment), UnorderedAtomic(UnorderedAtomic), AATags(AATags), - SafetyInfo(SafetyInfo) {} + SafetyInfo(SafetyInfo), + CanInsertStoresInExitBlocks(CanInsertStoresInExitBlocks) {} bool isInstInList(Instruction *I, const SmallVectorImpl &) const override { @@ -1901,7 +1903,7 @@ return PointerMustAliases.count(Ptr); } - void doExtraRewritesBeforeFinalDeletion() override { + void insertStoresInLoopExitBlocks() { // Insert stores after in the loop exit blocks. Each exit block gets a // store of the live-out values that feed them. Since we've already told // the SSA updater about the defs in the loop and the preheader @@ -1935,10 +1937,21 @@ } } + void doExtraRewritesBeforeFinalDeletion() override { + if (CanInsertStoresInExitBlocks) + insertStoresInLoopExitBlocks(); + } + void instructionDeleted(Instruction *I) const override { SafetyInfo.removeInstruction(I); MSSAU->removeMemoryAccess(I); } + + bool shouldDelete(Instruction *I) const override { + if (isa(I)) + return CanInsertStoresInExitBlocks; + return true; + } }; bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L, @@ -2037,6 +2050,7 @@ bool DereferenceableInPH = false; bool SafeToInsertStore = false; + bool FoundLoadToPromote = false; SmallVector LoopUses; @@ -2089,6 +2103,7 @@ SawUnorderedAtomic |= Load->isAtomic(); SawNotAtomic |= !Load->isAtomic(); + FoundLoadToPromote = true; Align InstAlignment = Load->getAlign(); @@ -2197,13 +2212,20 @@ } } - // If we've still failed to prove we can sink the store, give up. - if (!SafeToInsertStore) + // If we've still failed to prove we can sink the store, hoist the load + // only, if possible. + if (!SafeToInsertStore && !FoundLoadToPromote) + // If we cannot hoist the load either, give up. return false; - // Otherwise, this is safe to promote, lets do it! - LLVM_DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr - << '\n'); + // Lets do the promotion! + if (SafeToInsertStore) + LLVM_DEBUG(dbgs() << "LICM: Promoting load/store of the value: " << *SomePtr + << '\n'); + else + LLVM_DEBUG(dbgs() << "LICM: Promoting load of the value: " << *SomePtr + << '\n'); + ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "PromoteLoopAccessesToScalar", LoopUses[0]) @@ -2223,7 +2245,7 @@ LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, MSSAU, *LI, DL, Alignment.value(), SawUnorderedAtomic, AATags, - *SafetyInfo); + *SafetyInfo, SafeToInsertStore); // Set up the preheader to have a definition of the value. It is the live-out // value from the preheader that uses in the loop will use. Index: llvm/lib/Transforms/Utils/SSAUpdater.cpp =================================================================== --- llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -446,6 +446,9 @@ // Now that everything is rewritten, delete the old instructions from the // function. They should all be dead now. for (Instruction *User : Insts) { + if (!shouldDelete(User)) + continue; + // If this is a load that still has uses, then the load must have been added // as a live value in the SSAUpdate data structure for a block (e.g. because // the loaded value was stored later). In this case, we need to recursively Index: llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll =================================================================== --- llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll +++ llvm/test/Transforms/InstMerge/st_sink_bugfix_22613.ll @@ -5,12 +5,12 @@ ; RUN: opt -O2 -S < %s | FileCheck %s ; CHECK-LABEL: main -; CHECK: if.end -; CHECK: store ; CHECK: memset ; CHECK: if.then ; CHECK: store -; CHECK: memset +; CHECK: if.end +; CHECK: store +; CHECK: store @d = common global i32 0, align 4 @b = common global i32 0, align 4 Index: llvm/test/Transforms/LICM/hoist-load-without-store.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LICM/hoist-load-without-store.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -licm -S < %s | FileCheck %s + +;; C reproducer: +;; void f(int *ptr, int n) { +;; for (int i = 0; i < n; ++i) { +;; int x = *ptr; +;; if (x) +;; break; +;; +;; *ptr = x + 1; +;; } +;; } + +define dso_local void @f(i32* nocapture %ptr, i32 %n) { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_LR_PH:%.*]], label [[CLEANUP1:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[PTR_PROMOTED:%.*]] = load i32, i32* [[PTR:%.*]], align 4 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[PTR_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ 1, [[IF_END:%.*]] ] +; CHECK-NEXT: [[I_08:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[IF_END]] ] +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[IF_END]], label [[FOR_BODY_CLEANUP1_CRIT_EDGE:%.*]] +; CHECK: if.end: +; CHECK-NEXT: store i32 1, i32* [[PTR]], align 4 +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP1_CRIT_EDGE:%.*]] +; CHECK: for.body.cleanup1_crit_edge: +; CHECK-NEXT: br label [[CLEANUP1]] +; CHECK: for.cond.cleanup1_crit_edge: +; CHECK-NEXT: br label [[CLEANUP1]] +; CHECK: cleanup1: +; CHECK-NEXT: ret void +; +entry: + %cmp7 = icmp slt i32 0, %n + br i1 %cmp7, label %for.body.lr.ph, label %cleanup1 + +for.body.lr.ph: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %if.end + %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %if.end ] + %0 = load i32, i32* %ptr, align 4 + %tobool.not = icmp eq i32 %0, 0 + br i1 %tobool.not, label %if.end, label %for.body.cleanup1_crit_edge + +if.end: ; preds = %for.body + store i32 1, i32* %ptr, align 4 + %inc = add nuw nsw i32 %i.08, 1 + %cmp = icmp slt i32 %inc, %n + br i1 %cmp, label %for.body, label %for.cond.cleanup1_crit_edge + +for.body.cleanup1_crit_edge: ; preds = %for.body + br label %cleanup1 + +for.cond.cleanup1_crit_edge: ; preds = %if.end + br label %cleanup1 + +cleanup1: ; preds = %for.cond.cleanup1_crit_edge, %for.body.cleanup1_crit_edge, %entry + ret void +} Index: llvm/test/Transforms/LICM/promote-capture.ll =================================================================== --- llvm/test/Transforms/LICM/promote-capture.ll +++ llvm/test/Transforms/LICM/promote-capture.ll @@ -111,17 +111,19 @@ ; CHECK-NEXT: [[COUNT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store i32 0, i32* [[COUNT]], align 4 ; CHECK-NEXT: call void @capture(i32* [[COUNT]]) +; CHECK-NEXT: [[COUNT_PROMOTED:%.*]] = load i32, i32* [[COUNT]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[C_INC2:%.*]] = phi i32 [ [[COUNT_PROMOTED]], [[ENTRY:%.*]] ], [ [[C_INC1:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[I_NEXT:%.*]], [[LATCH]] ] ; CHECK-NEXT: [[COND:%.*]] = call i1 @cond(i32 [[I]]) ; CHECK-NEXT: br i1 [[COND]], label [[IF:%.*]], label [[LATCH]] ; CHECK: if: -; CHECK-NEXT: [[C:%.*]] = load i32, i32* [[COUNT]], align 4 -; CHECK-NEXT: [[C_INC:%.*]] = add i32 [[C]], 1 +; CHECK-NEXT: [[C_INC:%.*]] = add i32 [[C_INC2]], 1 ; CHECK-NEXT: store i32 [[C_INC]], i32* [[COUNT]], align 4 ; CHECK-NEXT: br label [[LATCH]] ; CHECK: latch: +; CHECK-NEXT: [[C_INC1]] = phi i32 [ [[C_INC]], [[IF]] ], [ [[C_INC2]], [[LOOP]] ] ; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I_NEXT]], [[LEN:%.*]] ; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]] Index: llvm/test/Transforms/LICM/scalar-promote-memmodel.ll =================================================================== --- llvm/test/Transforms/LICM/scalar-promote-memmodel.ll +++ llvm/test/Transforms/LICM/scalar-promote-memmodel.ll @@ -26,7 +26,7 @@ br label %for.inc ; CHECK: load i32, i32* -; CHECK-NEXT: add +; CHECK: add ; CHECK-NEXT: store i32 for.inc: ; preds = %for.body, %if.then Index: llvm/test/Transforms/LICM/scalar-promote.ll =================================================================== --- llvm/test/Transforms/LICM/scalar-promote.ll +++ llvm/test/Transforms/LICM/scalar-promote.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -basic-aa -tbaa -licm -S | FileCheck %s ; RUN: opt -aa-pipeline=tbaa,basic-aa -passes='require,require,require,require,loop-mssa(licm)' -S %s | FileCheck %s target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" @@ -5,12 +6,24 @@ @X = global i32 7 ; [#uses=4] define void @test1(i32 %i) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: Entry: +; CHECK-NEXT: [[X_PROMOTED:%.*]] = load i32, i32* @X, align 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: Loop: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[X2]] = add i32 [[X21]], 1 +; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[OUT:%.*]], label [[LOOP]] +; CHECK: Out: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ] +; CHECK-NEXT: store i32 [[X2_LCSSA]], i32* @X, align 4 +; CHECK-NEXT: ret void +; Entry: br label %Loop -; CHECK-LABEL: @test1( -; CHECK: Entry: -; CHECK-NEXT: load i32, i32* @X -; CHECK-NEXT: br label %Loop Loop: ; preds = %Loop, %0 @@ -24,20 +37,25 @@ Out: ret void -; CHECK: Out: -; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2 -; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* @X -; CHECK-NEXT: ret void } define void @test2(i32 %i) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: Entry: +; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, i32* getelementptr inbounds (i32, i32* @X, i64 1), align 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: Loop: +; CHECK-NEXT: [[V1:%.*]] = phi i32 [ [[V:%.*]], [[LOOP]] ], [ [[DOTPROMOTED]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[V]] = add i32 [[V1]], 1 +; CHECK-NEXT: br i1 false, label [[LOOP]], label [[EXIT:%.*]] +; CHECK: Exit: +; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i32 [ [[V]], [[LOOP]] ] +; CHECK-NEXT: store i32 [[V_LCSSA]], i32* getelementptr inbounds (i32, i32* @X, i64 1), align 4 +; CHECK-NEXT: ret void +; Entry: br label %Loop -; CHECK-LABEL: @test2( -; CHECK: Entry: -; CHECK-NEXT: %.promoted = load i32, i32* getelementptr inbounds (i32, i32* @X, i64 1) -; CHECK-NEXT: br label %Loop Loop: ; preds = %Loop, %0 %X1 = getelementptr i32, i32* @X, i64 1 ; [#uses=1] @@ -49,26 +67,29 @@ Exit: ; preds = %Loop ret void -; CHECK: Exit: -; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %V -; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* getelementptr inbounds (i32, i32* @X, i64 1) -; CHECK-NEXT: ret void } define void @test3(i32 %i) { ; CHECK-LABEL: @test3( +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: Loop: +; CHECK-NEXT: [[X:%.*]] = load volatile i32, i32* @X, align 4 +; CHECK-NEXT: [[X2:%.*]] = add i32 [[X]], 1 +; CHECK-NEXT: store i32 [[X2]], i32* @X, align 4 +; CHECK-NEXT: br i1 true, label [[OUT:%.*]], label [[LOOP]] +; CHECK: Out: +; CHECK-NEXT: ret void +; br label %Loop Loop: - ; Should not promote this to a register + ; Should not promote this to a register %x = load volatile i32, i32* @X %x2 = add i32 %x, 1 store i32 %x2, i32* @X br i1 true, label %Out, label %Loop -; CHECK: Loop: -; CHECK-NEXT: load volatile Out: ; preds = %Loop ret void @@ -76,12 +97,18 @@ define void @test3b(i32 %i) { ; CHECK-LABEL: @test3b( -; CHECK-LABEL: Loop: -; CHECK: store volatile -; CHECK-LABEL: Out: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: Loop: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* @X, align 4 +; CHECK-NEXT: [[X2:%.*]] = add i32 [[X]], 1 +; CHECK-NEXT: store volatile i32 [[X2]], i32* @X, align 4 +; CHECK-NEXT: br i1 true, label [[OUT:%.*]], label [[LOOP]] +; CHECK: Out: +; CHECK-NEXT: ret void +; br label %Loop Loop: - ; Should not promote this to a register + ; Should not promote this to a register %x = load i32, i32* @X %x2 = add i32 %x, 1 store volatile i32 %x2, i32* @X @@ -94,6 +121,32 @@ ; PR8041 define void @test4(i8* %x, i8 %n) { ; CHECK-LABEL: @test4( +; CHECK-NEXT: [[HANDLE1:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: [[HANDLE2:%.*]] = alloca i8*, align 8 +; CHECK-NEXT: store i8* [[X:%.*]], i8** [[HANDLE1]], align 8 +; CHECK-NEXT: [[TMP:%.*]] = getelementptr i8, i8* [[X]], i64 8 +; CHECK-NEXT: [[OFFSETX1:%.*]] = load i8*, i8** [[HANDLE1]], align 8 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br label [[SUBLOOP:%.*]] +; CHECK: subloop: +; CHECK-NEXT: [[NEWOFFSETX21:%.*]] = phi i8* [ [[TMP]], [[LOOP]] ], [ [[NEWOFFSETX2:%.*]], [[SUBLOOP]] ] +; CHECK-NEXT: [[COUNT:%.*]] = phi i8 [ 0, [[LOOP]] ], [ [[NEXTCOUNT:%.*]], [[SUBLOOP]] ] +; CHECK-NEXT: store i8 [[N:%.*]], i8* [[NEWOFFSETX21]], align 1 +; CHECK-NEXT: [[NEWOFFSETX2]] = getelementptr i8, i8* [[NEWOFFSETX21]], i64 -1 +; CHECK-NEXT: [[NEXTCOUNT]] = add i8 [[COUNT]], 1 +; CHECK-NEXT: [[INNEREXITCOND:%.*]] = icmp sge i8 [[NEXTCOUNT]], 8 +; CHECK-NEXT: br i1 [[INNEREXITCOND]], label [[INNEREXIT:%.*]], label [[SUBLOOP]] +; CHECK: innerexit: +; CHECK-NEXT: [[NEWOFFSETX2_LCSSA:%.*]] = phi i8* [ [[NEWOFFSETX2]], [[SUBLOOP]] ] +; CHECK-NEXT: [[VAL:%.*]] = load i8, i8* [[OFFSETX1]], align 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[VAL]], [[N]] +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: [[NEWOFFSETX2_LCSSA_LCSSA:%.*]] = phi i8* [ [[NEWOFFSETX2_LCSSA]], [[INNEREXIT]] ] +; CHECK-NEXT: store i8* [[NEWOFFSETX2_LCSSA_LCSSA]], i8** [[HANDLE2]], align 8 +; CHECK-NEXT: ret void +; %handle1 = alloca i8* %handle2 = alloca i8* store i8* %x, i8** %handle1 @@ -115,12 +168,6 @@ br i1 %innerexitcond, label %innerexit, label %subloop ; Should have promoted 'handle2' accesses. -; CHECK: subloop: -; CHECK-NEXT: phi i8* [ -; CHECK-NEXT: %count = phi i8 [ -; CHECK-NEXT: store i8 %n -; CHECK-NOT: store -; CHECK: br i1 innerexit: %offsetx1 = load i8*, i8** %handle1 @@ -129,22 +176,31 @@ br i1 %cond, label %exit, label %loop ; Should not have promoted offsetx1 loads. -; CHECK: innerexit: -; CHECK: %val = load i8, i8* %offsetx1 -; CHECK: %cond = icmp eq i8 %val, %n -; CHECK: br i1 %cond, label %exit, label %loop exit: ret void } define void @test5(i32 %i, i32** noalias %P2) { +; CHECK-LABEL: @test5( +; CHECK-NEXT: Entry: +; CHECK-NEXT: [[X_PROMOTED:%.*]] = load i32, i32* @X, align 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: Loop: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[X2]] = add i32 [[X21]], 1 +; CHECK-NEXT: store atomic i32* @X, i32** [[P2:%.*]] monotonic, align 8 +; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[OUT:%.*]], label [[LOOP]] +; CHECK: Out: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ] +; CHECK-NEXT: store i32 [[X2_LCSSA]], i32* @X, align 4 +; CHECK-NEXT: ret void +; Entry: br label %Loop -; CHECK-LABEL: @test5( -; CHECK: Entry: -; CHECK-NEXT: load i32, i32* @X -; CHECK-NEXT: br label %Loop Loop: ; preds = %Loop, %0 @@ -153,7 +209,7 @@ %x2 = add i32 %x, 1 ; [#uses=1] store i32 %x2, i32* @X - store atomic i32* @X, i32** %P2 monotonic, align 8 + store atomic i32* @X, i32** %P2 monotonic, align 8 %Next = add i32 %j, 1 ; [#uses=2] %cond = icmp eq i32 %Next, 0 ; [#uses=1] @@ -161,16 +217,36 @@ Out: ret void -; CHECK: Out: -; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2 -; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* @X -; CHECK-NEXT: ret void } ; PR14753 - Preserve TBAA tags when promoting values in a loop. define void @test6(i32 %n, float* nocapture %a, i32* %gi) { +; CHECK-LABEL: @test6( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 0, i32* [[GI:%.*]], align 4, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.lr.ph: +; CHECK-NEXT: [[GI_PROMOTED:%.*]] = load i32, i32* [[GI]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INC1:%.*]] = phi i32 [ [[GI_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[STOREMERGE2:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC]], [[FOR_BODY]] ] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[STOREMERGE2]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[IDXPROM]] +; CHECK-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !tbaa [[TBAA4:![0-9]+]] +; CHECK-NEXT: [[INC]] = add nsw i32 [[INC1]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]] +; CHECK: for.cond.for.end_crit_edge: +; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ] +; CHECK-NEXT: store i32 [[INC_LCSSA]], i32* [[GI]], align 4, !tbaa [[TBAA0]] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; entry: store i32 0, i32* %gi, align 4, !tbaa !0 %cmp1 = icmp slt i32 0, %n @@ -196,11 +272,6 @@ for.end: ; preds = %for.cond.for.end_crit_edge, %entry ret void -; CHECK: for.body.lr.ph: -; CHECK-NEXT: %gi.promoted = load i32, i32* %gi, align 4, !tbaa !0 -; CHECK: for.cond.for.end_crit_edge: -; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %inc -; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %gi, align 4, !tbaa !0 } declare i32 @opaque(i32) argmemonly @@ -209,16 +280,24 @@ ; We can promote even if opaque may throw. define i32 @test7() { ; CHECK-LABEL: @test7( -; CHECK: entry: -; CHECK-NEXT: %local = alloca -; CHECK-NEXT: call void @capture(i32* %local) -; CHECK-NEXT: load i32, i32* %local -; CHECK-NEXT: br label %loop -; CHECK: exit: -; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %loop ] -; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local -; CHECK-NEXT: %ret = load i32, i32* %local -; CHECK-NEXT: ret i32 %ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @capture(i32* [[LOCAL]]) +; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, i32* [[LOCAL]], align 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[X2]] = call i32 @opaque(i32 [[X21]]) +; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ] +; CHECK-NEXT: store i32 [[X2_LCSSA]], i32* [[LOCAL]], align 4 +; CHECK-NEXT: [[RET:%.*]] = load i32, i32* [[LOCAL]], align 4 +; CHECK-NEXT: ret i32 [[RET]] +; entry: %local = alloca i32 call void @capture(i32* %local) @@ -241,19 +320,32 @@ ; Make sure we don't promote if the store is really control-flow dependent. define i32 @test7bad() { ; CHECK-LABEL: @test7bad( -; CHECK: entry: -; CHECK-NEXT: %local = alloca -; CHECK-NEXT: call void @capture(i32* %local) -; CHECK-NEXT: br label %loop -; CHECK: if: -; CHECK-NEXT: store i32 %x2, i32* %local -; CHECK-NEXT: br label %else -; CHECK: exit: -; CHECK-NEXT: %ret = load i32, i32* %local -; CHECK-NEXT: ret i32 %ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @capture(i32* [[LOCAL]]) +; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, i32* [[LOCAL]], align 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[X22:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X21:%.*]], [[ELSE:%.*]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ] +; CHECK-NEXT: [[X2:%.*]] = call i32 @opaque(i32 [[X22]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X2]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]] +; CHECK: if: +; CHECK-NEXT: store i32 [[X2]], i32* [[LOCAL]], align 4 +; CHECK-NEXT: br label [[ELSE]] +; CHECK: else: +; CHECK-NEXT: [[X21]] = phi i32 [ [[X2]], [[IF]] ], [ [[X22]], [[LOOP]] ] +; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: [[RET:%.*]] = load i32, i32* [[LOCAL]], align 4 +; CHECK-NEXT: ret i32 [[RET]] +; entry: %local = alloca i32 - call void @capture(i32* %local) + call void @capture(i32* %local) br label %loop loop: %j = phi i32 [ 0, %entry ], [ %next, %else ] @@ -262,7 +354,7 @@ %cmp = icmp eq i32 %x2, 0 br i1 %cmp, label %if, label %else -if: +if: store i32 %x2, i32* %local br label %else @@ -281,25 +373,34 @@ ; doesn't block us, because %local is always dereferenceable. define i32 @test8() { ; CHECK-LABEL: @test8( -; CHECK: entry: -; CHECK-NEXT: %local = alloca -; CHECK-NEXT: call void @capture(i32* %local) -; CHECK-NEXT: load i32, i32* %local -; CHECK-NEXT: br label %loop -; CHECK: exit: -; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %loop ] -; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local -; CHECK-NEXT: %ret = load i32, i32* %local -; CHECK-NEXT: ret i32 %ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @capture(i32* [[LOCAL]]) +; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, i32* [[LOCAL]], align 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[THROWAWAY:%.*]] = call i32 @opaque(i32 [[J]]) +; CHECK-NEXT: [[X2]] = call i32 @opaque(i32 [[X21]]) +; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ] +; CHECK-NEXT: store i32 [[X2_LCSSA]], i32* [[LOCAL]], align 4 +; CHECK-NEXT: [[RET:%.*]] = load i32, i32* [[LOCAL]], align 4 +; CHECK-NEXT: ret i32 [[RET]] +; entry: %local = alloca i32 - call void @capture(i32* %local) + call void @capture(i32* %local) br label %loop loop: %j = phi i32 [ 0, %entry ], [ %next, %loop ] %throwaway = call i32 @opaque(i32 %j) - %x = load i32, i32* %local + %x = load i32, i32* %local %x2 = call i32 @opaque(i32 %x) store i32 %x2, i32* %local %next = add i32 %j, 1 @@ -317,28 +418,42 @@ ; dereferenceable define i32 @test9() { ; CHECK-LABEL: @test9( -; CHECK: entry: -; CHECK-NEXT: %local = alloca -; CHECK-NEXT: call void @capture(i32* %local) -; CHECK-NEXT: load i32, i32* %local -; CHECK-NEXT: br label %loop -; CHECK: exit: -; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2, %else ] -; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* %local -; CHECK-NEXT: %ret = load i32, i32* %local -; CHECK-NEXT: ret i32 %ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @capture(i32* [[LOCAL]]) +; CHECK-NEXT: [[LOCAL_PROMOTED:%.*]] = load i32, i32* [[LOCAL]], align 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[LOCAL_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[ELSE:%.*]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[ELSE]] ] +; CHECK-NEXT: [[J2:%.*]] = call i32 @opaque(i32 [[J]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[J2]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]] +; CHECK: if: +; CHECK-NEXT: br label [[ELSE]] +; CHECK: else: +; CHECK-NEXT: [[X2]] = phi i32 [ 0, [[LOOP]] ], [ [[X21]], [[IF]] ] +; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[ELSE]] ] +; CHECK-NEXT: store i32 [[X2_LCSSA]], i32* [[LOCAL]], align 4 +; CHECK-NEXT: [[RET:%.*]] = load i32, i32* [[LOCAL]], align 4 +; CHECK-NEXT: ret i32 [[RET]] +; entry: %local = alloca i32 - call void @capture(i32* %local) + call void @capture(i32* %local) br label %loop loop: - %j = phi i32 [ 0, %entry ], [ %next, %else ] + %j = phi i32 [ 0, %entry ], [ %next, %else ] %j2 = call i32 @opaque(i32 %j) %cmp = icmp eq i32 %j2, 0 br i1 %cmp, label %if, label %else -if: +if: %x = load i32, i32* %local br label %else @@ -356,30 +471,42 @@ define i32 @test9bad(i32 %i) { ; CHECK-LABEL: @test9bad( -; CHECK: entry: -; CHECK-NEXT: %local = alloca -; CHECK-NEXT: call void @capture(i32* %local) -; CHECK-NEXT: %notderef = getelementptr -; CHECK-NEXT: br label %loop -; CHECK: if: -; CHECK-NEXT: load i32, i32* %notderef -; CHECK-NEXT: br label %else -; CHECK: exit: -; CHECK-NEXT: %ret = load i32, i32* %notderef -; CHECK-NEXT: ret i32 %ret +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOCAL:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @capture(i32* [[LOCAL]]) +; CHECK-NEXT: [[NOTDEREF:%.*]] = getelementptr i32, i32* [[LOCAL]], i32 [[I:%.*]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[ELSE:%.*]] ] +; CHECK-NEXT: [[J2:%.*]] = call i32 @opaque(i32 [[J]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[J2]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF:%.*]], label [[ELSE]] +; CHECK: if: +; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[NOTDEREF]], align 4 +; CHECK-NEXT: br label [[ELSE]] +; CHECK: else: +; CHECK-NEXT: [[X2:%.*]] = phi i32 [ 0, [[LOOP]] ], [ [[X]], [[IF]] ] +; CHECK-NEXT: store i32 [[X2]], i32* [[NOTDEREF]], align 4 +; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: [[RET:%.*]] = load i32, i32* [[NOTDEREF]], align 4 +; CHECK-NEXT: ret i32 [[RET]] +; entry: %local = alloca i32 - call void @capture(i32* %local) + call void @capture(i32* %local) %notderef = getelementptr i32, i32* %local, i32 %i br label %loop loop: - %j = phi i32 [ 0, %entry ], [ %next, %else ] + %j = phi i32 [ 0, %entry ], [ %next, %else ] %j2 = call i32 @opaque(i32 %j) %cmp = icmp eq i32 %j2, 0 br i1 %cmp, label %if, label %else -if: +if: %x = load i32, i32* %notderef br label %else @@ -396,12 +523,24 @@ } define void @test10(i32 %i) { +; CHECK-LABEL: @test10( +; CHECK-NEXT: Entry: +; CHECK-NEXT: [[X_PROMOTED:%.*]] = load atomic i32, i32* @X unordered, align 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: Loop: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[X2]] = add i32 [[X21]], 1 +; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[OUT:%.*]], label [[LOOP]] +; CHECK: Out: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ] +; CHECK-NEXT: store atomic i32 [[X2_LCSSA]], i32* @X unordered, align 4 +; CHECK-NEXT: ret void +; Entry: br label %Loop -; CHECK-LABEL: @test10( -; CHECK: Entry: -; CHECK-NEXT: load atomic i32, i32* @X unordered, align 4 -; CHECK-NEXT: br label %Loop Loop: ; preds = %Loop, %0 @@ -415,22 +554,37 @@ Out: ret void -; CHECK: Out: -; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2 -; CHECK-NEXT: store atomic i32 %[[LCSSAPHI]], i32* @X unordered, align 4 -; CHECK-NEXT: ret void } ; Early exit is known not to be taken on first iteration and thus doesn't ; effect whether load is known to execute. define void @test11(i32 %i) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: Entry: +; CHECK-NEXT: [[X_PROMOTED:%.*]] = load i32, i32* @X, align 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: Loop: +; CHECK-NEXT: [[X21:%.*]] = phi i32 [ [[X_PROMOTED]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BODY:%.*]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[NEXT:%.*]], [[BODY]] ] +; CHECK-NEXT: [[EARLY_TEST:%.*]] = icmp ult i32 [[J]], 32 +; CHECK-NEXT: br i1 [[EARLY_TEST]], label [[BODY]], label [[EARLY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[X2]] = add i32 [[X21]], 1 +; CHECK-NEXT: [[NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[NEXT]], 0 +; CHECK-NEXT: br i1 [[COND]], label [[OUT:%.*]], label [[LOOP]] +; CHECK: Early: +; CHECK-NEXT: [[X21_LCSSA:%.*]] = phi i32 [ [[X21]], [[LOOP]] ] +; CHECK-NEXT: store i32 [[X21_LCSSA]], i32* @X, align 4 +; CHECK-NEXT: ret void +; CHECK: Out: +; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[BODY]] ] +; CHECK-NEXT: store i32 [[X2_LCSSA]], i32* @X, align 4 +; CHECK-NEXT: ret void +; Entry: br label %Loop -; CHECK-LABEL: @test11( -; CHECK: Entry: -; CHECK-NEXT: load i32, i32* @X -; CHECK-NEXT: br label %Loop Loop: ; preds = %Loop, %0 @@ -446,17 +600,9 @@ br i1 %cond, label %Out, label %Loop Early: -; CHECK: Early: -; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2 -; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* @X -; CHECK-NEXT: ret void ret void Out: ret void -; CHECK: Out: -; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2 -; CHECK-NEXT: store i32 %[[LCSSAPHI]], i32* @X -; CHECK-NEXT: ret void }