Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -328,6 +328,9 @@ bool PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock, UnavailBlkVect &UnavailableBlocks); + bool PerformLoopLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks); + /// Eliminates partially redundant \p Load, replacing it with \p /// AvailableLoads (connected by Phis if needed). void eliminatePartiallyRedundantLoad( Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -91,13 +91,15 @@ #define DEBUG_TYPE "gvn" -STATISTIC(NumGVNInstr, "Number of instructions deleted"); -STATISTIC(NumGVNLoad, "Number of loads deleted"); -STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); -STATISTIC(NumGVNBlocks, "Number of blocks merged"); -STATISTIC(NumGVNSimpl, "Number of instructions simplified"); -STATISTIC(NumGVNEqProp, "Number of equalities propagated"); -STATISTIC(NumPRELoad, "Number of loads PRE'd"); +STATISTIC(NumGVNInstr, "Number of instructions deleted"); +STATISTIC(NumGVNLoad, "Number of loads deleted"); +STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); +STATISTIC(NumGVNBlocks, "Number of blocks merged"); +STATISTIC(NumGVNSimpl, "Number of instructions simplified"); +STATISTIC(NumGVNEqProp, "Number of equalities propagated"); +STATISTIC(NumPRELoad, "Number of loads PRE'd"); +STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd"); + STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax, "Number of blocks speculated as available in " @@ -1447,6 +1449,61 @@ return true; } +bool GVN::PerformLoopLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock, + UnavailBlkVect &UnavailableBlocks) { + if (!LI) + return false; + + const Loop *L = LI->getLoopFor(Load->getParent()); + // TODO: Generalize to other loop blocks that dominate the latch. + if (!L || L->getHeader() != Load->getParent()) + return false; + + BasicBlock *Preheader = L->getLoopPreheader(); + BasicBlock *Latch = L->getLoopLatch(); + if (!Preheader || !Latch) + return false; + + // We can side-exit before the load is executed. + if (ICF->isDominatedByICFIFromSameBlock(Load)) + return false; + + unsigned UnavailableLoopBlocks = 0; + for (auto *Blocker : UnavailableBlocks) { + // Blockers from outside the loop are handled in preheader. + if (!L->contains(Blocker)) + continue; + // Do not sink into inner loops. + if (L != LI->getLoopFor(Blocker)) + return false; + + UnavailableLoopBlocks++; + // So far, only PRE into not-mustexecute blocks. + if (DT->dominates(Blocker, Latch)) { + return false; + } + + if (!isa(Blocker->getTerminator())) + return false; + } + + // TODO: We could do it if we had block frequency info here. + if (UnavailableLoopBlocks != 1) + return false; + + // TODO: Support critical edge splitting if blocker has more than 1 successor. + Value *LoadPtr = Load->getPointerOperand(); + MapVector AvailableLoads; + for (auto *Blocker : UnavailableBlocks) + AvailableLoads[Blocker] = LoadPtr; + AvailableLoads[Preheader] = LoadPtr; + + LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n'); + eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads); + ++NumPRELoopLoad; + return true; +} + static void reportLoadElim(LoadInst *Load, Value *AvailableValue, OptimizationRemarkEmitter *ORE) { using namespace ore; @@ -1544,7 +1601,7 @@ if (!isLoadInLoopPREEnabled() && LI && LI->getLoopFor(Load->getParent())) return Changed; - return Changed || PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks); + return Changed || PerformLoadPRE(Load, ValuesPerBlock, UnavailableBlocks) || PerformLoopLoadPRE(Load, ValuesPerBlock, UnavailableBlocks); } static bool impliesEquivalanceIfTrue(CmpInst* Cmp) { Index: llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll +++ llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll @@ -27,22 +27,25 @@ ; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[BB_NPH:%.*]] ; CHECK: bb.nph: ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_A]], %struct.A* [[ITER]], i32 0, i32 1 +; CHECK-NEXT: [[DOTPRE1:%.*]] = load i32, i32* [[TMP3]], align 4 ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DOTRLE:%.*]] = phi i32 [ [[TMP1]], [[BB_NPH]] ], [ [[TMP7:%.*]], [[BB3_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[DOTRLE]], 1 -; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[DOTPRE1]], [[BB_NPH]] ], [ [[TMP8:%.*]], [[BB3_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[DOTRLE:%.*]] = phi i32 [ [[TMP1]], [[BB_NPH]] ], [ [[TMP7:%.*]], [[BB3_BACKEDGE]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[DOTRLE]], 1 +; CHECK-NEXT: store i32 [[TMP5]], i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], [[TMP4]] ; CHECK-NEXT: br i1 [[TMP6]], label [[BB1:%.*]], label [[BB3_BACKEDGE]] ; CHECK: bb1: ; CHECK-NEXT: tail call void @_Z1gv() -; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[TMP3]], align 4 +; CHECK-NEXT: [[DOTPRE2:%.*]] = load i32, i32* [[TMP0]], align 4 ; CHECK-NEXT: br label [[BB3_BACKEDGE]] ; CHECK: bb3.backedge: -; CHECK-NEXT: [[TMP7]] = phi i32 [ [[DOTPRE]], [[BB1]] ], [ [[TMP4]], [[BB]] ] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[RETURN]], label [[BB]] +; CHECK-NEXT: [[TMP7]] = phi i32 [ [[DOTPRE2]], [[BB1]] ], [ [[TMP5]], [[BB]] ] +; CHECK-NEXT: [[TMP8]] = phi i32 [ [[DOTPRE]], [[BB1]] ], [ [[TMP4]], [[BB]] ] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[RETURN]], label [[BB]] ; CHECK: return: ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/GVN/PRE/pre-aliasning-path.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/pre-aliasning-path.ll +++ llvm/test/Transforms/GVN/PRE/pre-aliasning-path.ll @@ -7,22 +7,24 @@ declare void @no_side_effect() readonly -; TODO: We can PRE the load into the cold path, removing it from the hot path. define i32 @test_01(i32* %p) { ; CHECK-LABEL: @test_01( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE1]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[X]], 100 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH:%.*]] ; CHECK: hot_path: ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: cold_path: ; CHECK-NEXT: call void @side_effect_0() +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X_PRE]], [[COLD_PATH]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -54,22 +56,24 @@ ret i32 %x } -; TODO: We can PRE the load into the cold path, removing it from the hot path. define i32 @test_02(i32* %p) { ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE1]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[X]], 100 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH:%.*]] ; CHECK: hot_path: ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: cold_path: ; CHECK-NEXT: call void @side_effect_1(i32 [[X]]) +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X_PRE]], [[COLD_PATH]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] Index: llvm/test/Transforms/GVN/PRE/pre-loop-load.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/pre-loop-load.ll +++ llvm/test/Transforms/GVN/PRE/pre-loop-load.ll @@ -6,22 +6,24 @@ declare i32 @personality_function() -; TODO: We can PRE the load away from the hot path. define i32 @test_load_on_cold_path(i32* %p) { ; CHECK-LABEL: @test_load_on_cold_path( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[X]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH:%.*]] ; CHECK: hot_path: ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: cold_path: ; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X_PRE1]], [[COLD_PATH]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -154,18 +156,21 @@ define i32 @test_load_on_exiting_cold_path_01(i32* %p) { ; CHECK-LABEL: @test_load_on_exiting_cold_path_01( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[X]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH:%.*]] ; CHECK: hot_path: ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: cold_path: ; CHECK-NEXT: [[SIDE_COND:%.*]] = call i1 @side_effect_cond() +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br i1 [[SIDE_COND]], label [[BACKEDGE]], label [[COLD_EXIT:%.*]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X_PRE1]], [[COLD_PATH]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -369,10 +374,11 @@ define i32 @test_load_on_multi_exiting_cold_path(i32* %p) { ; CHECK-LABEL: @test_load_on_multi_exiting_cold_path( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[X]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH_1:%.*]] ; CHECK: hot_path: @@ -385,8 +391,10 @@ ; CHECK-NEXT: br i1 [[SIDE_COND_2]], label [[COLD_PATH_3:%.*]], label [[COLD_EXIT]] ; CHECK: cold_path.3: ; CHECK-NEXT: [[SIDE_COND_3:%.*]] = call i1 @side_effect_cond() +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br i1 [[SIDE_COND_3]], label [[BACKEDGE]], label [[COLD_EXIT]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X_PRE1]], [[COLD_PATH_3]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -572,10 +580,11 @@ define i32 @test_side_exit_after_merge(i32* %p) { ; CHECK-LABEL: @test_side_exit_after_merge( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[X]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH:%.*]] ; CHECK: hot_path: @@ -585,11 +594,14 @@ ; CHECK-NEXT: br i1 [[COND_1]], label [[DO_CALL:%.*]], label [[SIDE_EXITING:%.*]] ; CHECK: do_call: ; CHECK-NEXT: [[SIDE_COND:%.*]] = call i1 @side_effect_cond() +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[SIDE_EXITING]] ; CHECK: side_exiting: +; CHECK-NEXT: [[X3:%.*]] = phi i32 [ [[X_PRE1]], [[DO_CALL]] ], [ 0, [[COLD_PATH]] ] ; CHECK-NEXT: [[SIDE_COND_PHI:%.*]] = phi i1 [ [[SIDE_COND]], [[DO_CALL]] ], [ true, [[COLD_PATH]] ] ; CHECK-NEXT: br i1 [[SIDE_COND_PHI]], label [[BACKEDGE]], label [[COLD_EXIT:%.*]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X3]], [[SIDE_EXITING]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -689,11 +701,12 @@ define i32 @test_guard_2(i32* %p, i32 %g) { ; CHECK-LABEL: @test_guard_2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[GUARD_COND:%.*]] = icmp ne i32 [[IV]], [[G:%.*]] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[GUARD_COND]]) [ "deopt"() ] ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[X]], 100 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH:%.*]] @@ -701,8 +714,10 @@ ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: cold_path: ; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X_PRE1]], [[COLD_PATH]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]