Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -91,13 +91,14 @@ #define DEBUG_TYPE "gvn" -STATISTIC(NumGVNInstr, "Number of instructions deleted"); -STATISTIC(NumGVNLoad, "Number of loads deleted"); -STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); -STATISTIC(NumGVNBlocks, "Number of blocks merged"); -STATISTIC(NumGVNSimpl, "Number of instructions simplified"); -STATISTIC(NumGVNEqProp, "Number of equalities propagated"); -STATISTIC(NumPRELoad, "Number of loads PRE'd"); +STATISTIC(NumGVNInstr, "Number of instructions deleted"); +STATISTIC(NumGVNLoad, "Number of loads deleted"); +STATISTIC(NumGVNPRE, "Number of instructions PRE'd"); +STATISTIC(NumGVNBlocks, "Number of blocks merged"); +STATISTIC(NumGVNSimpl, "Number of instructions simplified"); +STATISTIC(NumGVNEqProp, "Number of equalities propagated"); +STATISTIC(NumPRELoad, "Number of loads PRE'd"); +STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd"); STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax, "Number of blocks speculated as available in " @@ -1199,8 +1200,39 @@ for (BasicBlock *UnavailableBB : UnavailableBlocks) FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable; + SmallVector BlocksToProcess; + auto *L = LI ? LI->getLoopFor(LoadBB) : nullptr; + // If we are dealing with a load in loop in a block that executes on every + // iteration (such as loop header block), and its clobber does not execute on + // every loop iteration, we can perform PRE of this load to make it only if + // clobber has happened. + bool LoopLoadPRE = !Blockers.count(LoadBB) && L && L->getHeader() == LoadBB && + L->getLoopPreheader() && L->getLoopLatch(); + // Non-invariant pointer won't be available in preheader. + if (LoopLoadPRE) + LoopLoadPRE = L->isLoopInvariant(Load->getPointerOperand()); + // Make sure we are not PRE'ing into inner loops. + // TODO: We could actually, if we can block frequency info. Inner loop still + // may be colder than the header of outer loop if it executes on cold path. + if (LoopLoadPRE) + LoopLoadPRE = all_of(Blockers, [&](const BasicBlock *Blocker) { + return !L->contains(Blocker) || + (LI->getLoopFor(Blocker) == L && + !DT->dominates(Blocker, L->getLoopLatch())); + }); + + if (LoopLoadPRE) { + for (auto *BB : Blockers) + if (L->contains(BB)) + BlocksToProcess.push_back(BB); + BlocksToProcess.push_back(L->getLoopPreheader()); + } else { + for (auto *BB : predecessors(LoadBB)) + BlocksToProcess.push_back(BB); + } + SmallVector CriticalEdgePred; - for (BasicBlock *Pred : predecessors(LoadBB)) { + for (BasicBlock *Pred : BlocksToProcess) { // If any predecessor block is an EH pad that does not allow non-PHI // instructions before the terminator, we can't PRE the load. if (Pred->getTerminator()->isEHPad()) { @@ -1237,17 +1269,30 @@ return false; } - // Do not split backedge as it will break the canonical loop form. - if (!isLoadPRESplitBackedgeEnabled()) - if (DT->dominates(LoadBB, Pred)) { + // FIXME: Implement critical edge splitting for loop mode. So far, we only + // insert loads in the blocks where they invalidate. We can support + // invokes when this is done. + if (LoopLoadPRE) { + if (isa(Pred->getTerminator())) { LLVM_DEBUG( - dbgs() - << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '" - << Pred->getName() << "': " << *Load << '\n'); + dbgs() << "COULD NOT PRE LOAD BECAUSE OF INVOKE CRITICAL EDGE '" + << Pred->getName() << "': " << *Load << '\n'); return false; } - - CriticalEdgePred.push_back(Pred); + PredLoads[Pred] = nullptr; + } else { + // Do not split backedge as it will break the canonical loop form. + if (!isLoadPRESplitBackedgeEnabled()) { + if (DT->dominates(LoadBB, Pred)) { + LLVM_DEBUG( + dbgs() + << "COULD NOT PRE LOAD BECAUSE OF A BACKEDGE CRITICAL EDGE '" + << Pred->getName() << "': " << *Load << '\n'); + return false; + } + } + CriticalEdgePred.push_back(Pred); + } } else { // Only add the predecessors that will not be split for now. PredLoads[Pred] = nullptr; @@ -1263,7 +1308,14 @@ // FIXME: If we could restructure the CFG, we could make a common pred with // all the preds that don't have an available Load and insert a new load into // that one block. - if (NumUnavailablePreds != 1) + // FIXME: For loop mode, we could not restrain ourselves by the number of + // in-loop clobbers. If the sum of their frequencies is less than the + // frequency of header, it is still a profitable thing to do. Unfortunately, + // we don't have frequency info to figure it out. For one in-loop block it is + // easy: header is not colder than any other loop block. Once we have the + // frequency info available, we can go with unlimited number of in-loop loads. + unsigned MaxUnavailableBlocks = LoopLoadPRE ? 2 : 1; + if (NumUnavailablePreds > MaxUnavailableBlocks) return false; // Now we know where we will insert load. We must ensure that it is safe @@ -1437,7 +1489,10 @@ return OptimizationRemark(DEBUG_TYPE, "LoadPRE", Load) << "load eliminated by PRE"; }); - ++NumPRELoad; + if (LoopLoadPRE) + ++NumPRELoopLoad; + else + ++NumPRELoad; return true; } Index: llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll +++ llvm/test/Transforms/GVN/PRE/lpre-call-wrap.ll @@ -27,22 +27,25 @@ ; CHECK-NEXT: br i1 [[TMP2]], label [[RETURN:%.*]], label [[BB_NPH:%.*]] ; CHECK: bb.nph: ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr [[STRUCT_A]], %struct.A* [[ITER]], i32 0, i32 1 +; CHECK-NEXT: [[DOTPRE1:%.*]] = load i32, i32* [[TMP3]], align 4 ; CHECK-NEXT: br label [[BB:%.*]] ; CHECK: bb: -; CHECK-NEXT: [[DOTRLE:%.*]] = phi i32 [ [[TMP1]], [[BB_NPH]] ], [ [[TMP7:%.*]], [[BB3_BACKEDGE:%.*]] ] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[DOTRLE]], 1 -; CHECK-NEXT: store i32 [[TMP4]], i32* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[DOTPRE1]], [[BB_NPH]] ], [ [[TMP8:%.*]], [[BB3_BACKEDGE:%.*]] ] +; CHECK-NEXT: [[DOTRLE:%.*]] = phi i32 [ [[TMP1]], [[BB_NPH]] ], [ [[TMP7:%.*]], [[BB3_BACKEDGE]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[DOTRLE]], 1 +; CHECK-NEXT: store i32 [[TMP5]], i32* [[TMP0]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], [[TMP4]] ; CHECK-NEXT: br i1 [[TMP6]], label [[BB1:%.*]], label [[BB3_BACKEDGE]] ; CHECK: bb1: ; CHECK-NEXT: tail call void @_Z1gv() -; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[TMP0]], align 4 +; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[TMP3]], align 4 +; CHECK-NEXT: [[DOTPRE2:%.*]] = load i32, i32* [[TMP0]], align 4 ; CHECK-NEXT: br label [[BB3_BACKEDGE]] ; CHECK: bb3.backedge: -; CHECK-NEXT: [[TMP7]] = phi i32 [ [[DOTPRE]], [[BB1]] ], [ [[TMP4]], [[BB]] ] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0 -; CHECK-NEXT: br i1 [[TMP8]], label [[RETURN]], label [[BB]] +; CHECK-NEXT: [[TMP7]] = phi i32 [ [[DOTPRE2]], [[BB1]] ], [ [[TMP5]], [[BB]] ] +; CHECK-NEXT: [[TMP8]] = phi i32 [ [[DOTPRE]], [[BB1]] ], [ [[TMP4]], [[BB]] ] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP7]], 0 +; CHECK-NEXT: br i1 [[TMP9]], label [[RETURN]], label [[BB]] ; CHECK: return: ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/GVN/PRE/pre-aliasning-path.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/pre-aliasning-path.ll +++ llvm/test/Transforms/GVN/PRE/pre-aliasning-path.ll @@ -7,22 +7,24 @@ declare void @no_side_effect() readonly -; TODO: We can PRE the load into the cold path, removing it from the hot path. define i32 @test_01(i32* %p) { ; CHECK-LABEL: @test_01( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE1]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[X]], 100 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH:%.*]] ; CHECK: hot_path: ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: cold_path: ; CHECK-NEXT: call void @side_effect_0() +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X_PRE]], [[COLD_PATH]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -54,22 +56,24 @@ ret i32 %x } -; TODO: We can PRE the load into the cold path, removing it from the hot path. define i32 @test_02(i32* %p) { ; CHECK-LABEL: @test_02( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE1]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[X]], 100 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH:%.*]] ; CHECK: hot_path: ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: cold_path: ; CHECK-NEXT: call void @side_effect_1(i32 [[X]]) +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X_PRE]], [[COLD_PATH]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] Index: llvm/test/Transforms/GVN/PRE/pre-loop-load.ll =================================================================== --- llvm/test/Transforms/GVN/PRE/pre-loop-load.ll +++ llvm/test/Transforms/GVN/PRE/pre-loop-load.ll @@ -6,22 +6,24 @@ declare i32 @personality_function() -; TODO: We can PRE the load away from the hot path. define i32 @test_load_on_cold_path(i32* %p) { ; CHECK-LABEL: @test_load_on_cold_path( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE1]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[X]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH:%.*]] ; CHECK: hot_path: ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: cold_path: ; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X_PRE]], [[COLD_PATH]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -154,18 +156,21 @@ define i32 @test_load_on_exiting_cold_path_01(i32* %p) { ; CHECK-LABEL: @test_load_on_exiting_cold_path_01( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE1]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[X]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH:%.*]] ; CHECK: hot_path: ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: cold_path: ; CHECK-NEXT: [[SIDE_COND:%.*]] = call i1 @side_effect_cond() +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br i1 [[SIDE_COND]], label [[BACKEDGE]], label [[COLD_EXIT:%.*]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X_PRE]], [[COLD_PATH]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -369,10 +374,11 @@ define i32 @test_load_on_multi_exiting_cold_path(i32* %p) { ; CHECK-LABEL: @test_load_on_multi_exiting_cold_path( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE1]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[X]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH_1:%.*]] ; CHECK: hot_path: @@ -385,8 +391,10 @@ ; CHECK-NEXT: br i1 [[SIDE_COND_2]], label [[COLD_PATH_3:%.*]], label [[COLD_EXIT]] ; CHECK: cold_path.3: ; CHECK-NEXT: [[SIDE_COND_3:%.*]] = call i1 @side_effect_cond() +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br i1 [[SIDE_COND_3]], label [[BACKEDGE]], label [[COLD_EXIT]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X_PRE]], [[COLD_PATH_3]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -572,10 +580,11 @@ define i32 @test_side_exit_after_merge(i32* %p) { ; CHECK-LABEL: @test_side_exit_after_merge( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE1]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[X]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH:%.*]] ; CHECK: hot_path: @@ -585,11 +594,14 @@ ; CHECK-NEXT: br i1 [[COND_1]], label [[DO_CALL:%.*]], label [[SIDE_EXITING:%.*]] ; CHECK: do_call: ; CHECK-NEXT: [[SIDE_COND:%.*]] = call i1 @side_effect_cond() +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[SIDE_EXITING]] ; CHECK: side_exiting: +; CHECK-NEXT: [[X3:%.*]] = phi i32 [ [[X_PRE]], [[DO_CALL]] ], [ 0, [[COLD_PATH]] ] ; CHECK-NEXT: [[SIDE_COND_PHI:%.*]] = phi i1 [ [[SIDE_COND]], [[DO_CALL]] ], [ true, [[COLD_PATH]] ] ; CHECK-NEXT: br i1 [[SIDE_COND_PHI]], label [[BACKEDGE]], label [[COLD_EXIT:%.*]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X3]], [[SIDE_EXITING]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]] @@ -689,11 +701,12 @@ define i32 @test_guard_2(i32* %p, i32 %g) { ; CHECK-LABEL: @test_guard_2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[X_PRE1:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[X_PRE1]], [[ENTRY:%.*]] ], [ [[X2:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE]] ] ; CHECK-NEXT: [[GUARD_COND:%.*]] = icmp ne i32 [[IV]], [[G:%.*]] -; CHECK-NEXT: [[X:%.*]] = load i32, i32* [[P:%.*]], align 4 ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[GUARD_COND]]) [ "deopt"() ] ; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[X]], 100 ; CHECK-NEXT: br i1 [[COND]], label [[HOT_PATH:%.*]], label [[COLD_PATH:%.*]] @@ -701,8 +714,10 @@ ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: cold_path: ; CHECK-NEXT: call void @side_effect() +; CHECK-NEXT: [[X_PRE:%.*]] = load i32, i32* [[P]], align 4 ; CHECK-NEXT: br label [[BACKEDGE]] ; CHECK: backedge: +; CHECK-NEXT: [[X2]] = phi i32 [ [[X_PRE]], [[COLD_PATH]] ], [ [[X]], [[HOT_PATH]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[X]] ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 1000 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]