diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -77,32 +77,7 @@ // Check whether we are capable of peeling this loop. bool llvm::canPeel(Loop *L) { // Make sure the loop is in simplified form - if (!L->isLoopSimplifyForm()) - return false; - - // Don't try to peel loops where the latch is not the exiting block. - // This can be an indication of two different things: - // 1) The loop is not rotated. - // 2) The loop contains irreducible control flow that involves the latch. - const BasicBlock *Latch = L->getLoopLatch(); - if (!L->isLoopExiting(Latch)) - return false; - - // Peeling is only supported if the latch is a branch. - if (!isa(Latch->getTerminator())) - return false; - - SmallVector Exits; - L->getUniqueNonLatchExitBlocks(Exits); - // The latch must either be the only exiting block or all non-latch exit - // blocks have either a deopt or unreachable terminator or compose a chain of - // blocks where the last one is either deopt or unreachable terminated. Both - // deopt and unreachable terminators are a strong indication they are not - // taken. Note that this is a profitability check, not a legality check. Also - // note that LoopPeeling currently can only update the branch weights of latch - // blocks and branch weights to blocks with deopt or unreachable do not need - // updating. - return llvm::all_of(Exits, IsBlockFollowedByDeoptOrUnreachable); + return L->isLoopSimplifyForm(); } // This function calculates the number of iterations after which the given Phi @@ -487,82 +462,87 @@ } } -/// Update the branch weights of the latch of a peeled-off loop +struct WeightInfo { + // Weights for current iteration. + SmallVector Weights; + // Weights to subtract after each iteration. + const SmallVector SubWeights; +}; + +/// Update the branch weights of an exiting block of a peeled-off loop /// iteration. -/// This sets the branch weights for the latch of the recently peeled off loop -/// iteration correctly. -/// Let F is a weight of the edge from latch to header. -/// Let E is a weight of the edge from latch to exit. +/// Let F is a weight of the edge to continue (fallthrough) into the loop. +/// Let E is a weight of the edge to an exit. /// F/(F+E) is a probability to go to loop and E/(F+E) is a probability to /// go to exit. -/// Then, Estimated TripCount = F / E. +/// Then, Estimated ExitCount = F / E. /// For I-th (counting from 0) peeled off iteration we set the the weights for -/// the peeled latch as (TC - I, 1). It gives us reasonable distribution, -/// The probability to go to exit 1/(TC-I) increases. At the same time -/// the estimated trip count of remaining loop reduces by I. +/// the peeled exit as (EC - I, 1). It gives us reasonable distribution, +/// The probability to go to exit 1/(EC-I) increases. At the same time +/// the estimated exit count in the remainder loop reduces by I. /// To avoid dealing with division rounding we can just multiple both part /// of weights to E and use weight as (F - I * E, E). -/// -/// \param Header The copy of the header block that belongs to next iteration. -/// \param LatchBR The copy of the latch branch that belongs to this iteration. -/// \param[in,out] FallThroughWeight The weight of the edge from latch to -/// header before peeling (in) and after peeled off one iteration (out). -static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - uint64_t ExitWeight, - uint64_t &FallThroughWeight) { - // FallThroughWeight is 0 means that there is no branch weights on original - // latch block or estimated trip count is zero. - if (!FallThroughWeight) - return; - - unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1); - MDBuilder MDB(LatchBR->getContext()); - MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) - : MDB.createBranchWeights(FallThroughWeight, ExitWeight); - LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); - FallThroughWeight = - FallThroughWeight > ExitWeight ? FallThroughWeight - ExitWeight : 1; +static void updateBranchWeights(Instruction *Term, WeightInfo &Info) { + MDBuilder MDB(Term->getContext()); + Term->setMetadata(LLVMContext::MD_prof, + MDB.createBranchWeights(Info.Weights)); + for (auto [Idx, SubWeight] : enumerate(Info.SubWeights)) + if (SubWeight != 0) + Info.Weights[Idx] = Info.Weights[Idx] > SubWeight + ? Info.Weights[Idx] - SubWeight + : 1; } -/// Initialize the weights. -/// -/// \param Header The header block. -/// \param LatchBR The latch branch. -/// \param[out] ExitWeight The weight of the edge from Latch to Exit. -/// \param[out] FallThroughWeight The weight of the edge from Latch to Header. -static void initBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - uint64_t &ExitWeight, - uint64_t &FallThroughWeight) { - uint64_t TrueWeight, FalseWeight; - if (!extractBranchWeights(*LatchBR, TrueWeight, FalseWeight)) - return; - unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; - ExitWeight = HeaderIdx ? TrueWeight : FalseWeight; - FallThroughWeight = HeaderIdx ? FalseWeight : TrueWeight; -} +/// Initialize the weights for all exiting blocks. +static void initBranchWeights(DenseMap &WeightInfos, + Loop *L) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (BasicBlock *ExitingBlock : ExitingBlocks) { + Instruction *Term = ExitingBlock->getTerminator(); + SmallVector Weights; + if (!extractBranchWeights(*Term, Weights)) + continue; -/// Update the weights of original Latch block after peeling off all iterations. -/// -/// \param Header The header block. -/// \param LatchBR The latch branch. -/// \param ExitWeight The weight of the edge from Latch to Exit. -/// \param FallThroughWeight The weight of the edge from Latch to Header. -static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR, - uint64_t ExitWeight, - uint64_t FallThroughWeight) { - // FallThroughWeight is 0 means that there is no branch weights on original - // latch block or estimated trip count is zero. - if (!FallThroughWeight) - return; + // See the comment on updateBranchWeights() for an explanation of what we + // do here. + uint32_t FallThroughWeights = 0; + uint32_t ExitWeights = 0; + for (auto [Succ, Weight] : zip(successors(Term), Weights)) { + if (L->contains(Succ)) + FallThroughWeights += Weight; + else + ExitWeights += Weight; + } + + // Don't try to update weights for degenerate case. + if (FallThroughWeights == 0) + continue; - // Sets the branch weights on the loop exit. - MDBuilder MDB(LatchBR->getContext()); - unsigned HeaderIdx = LatchBR->getSuccessor(0) == Header ? 0 : 1; - MDNode *WeightNode = - HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThroughWeight) - : MDB.createBranchWeights(FallThroughWeight, ExitWeight); - LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode); + SmallVector SubWeights; + for (auto [Succ, Weight] : zip(successors(Term), Weights)) { + if (!L->contains(Succ)) { + // Exit weights stay the same. + SubWeights.push_back(0); + continue; + } + + // Subtract exit weights on each iteration, distributed across all + // fallthrough edges. + double W = (double)Weight / (double)FallThroughWeights; + SubWeights.push_back((uint32_t)(ExitWeights * W)); + } + + WeightInfos.insert({Term, {std::move(Weights), std::move(SubWeights)}}); + } +} + +/// Update the weights of original exiting block after peeling off all +/// iterations. +static void fixupBranchWeights(Instruction *Term, const WeightInfo &Info) { + MDBuilder MDB(Term->getContext()); + Term->setMetadata(LLVMContext::MD_prof, + MDB.createBranchWeights(Info.Weights)); } /// Clones the body of the loop L, putting it between \p InsertTop and \p @@ -644,10 +624,10 @@ // header (for the last peeled iteration) or the copied header of the next // iteration (for every other iteration) BasicBlock *NewLatch = cast(VMap[Latch]); - BranchInst *LatchBR = cast(NewLatch->getTerminator()); - for (unsigned idx = 0, e = LatchBR->getNumSuccessors(); idx < e; ++idx) - if (LatchBR->getSuccessor(idx) == Header) { - LatchBR->setSuccessor(idx, InsertBot); + auto *LatchTerm = cast(NewLatch->getTerminator()); + for (unsigned idx = 0, e = LatchTerm->getNumSuccessors(); idx < e; ++idx) + if (LatchTerm->getSuccessor(idx) == Header) { + LatchTerm->setSuccessor(idx, InsertBot); break; } if (DT) @@ -835,12 +815,13 @@ ValueToValueMapTy LVMap; + Instruction *LatchTerm = + cast(cast(Latch)->getTerminator()); + // If we have branch weight information, we'll want to update it for the // newly created branches. - BranchInst *LatchBR = - cast(cast(Latch)->getTerminator()); - uint64_t ExitWeight = 0, FallThroughWeight = 0; - initBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); + DenseMap Weights; + initBranchWeights(Weights, L); // Identify what noalias metadata is inside the loop: if it is inside the // loop, the associated metadata must be cloned for each iteration. @@ -869,11 +850,15 @@ assert(DT.verify(DominatorTree::VerificationLevel::Fast)); #endif - auto *LatchBRCopy = cast(VMap[LatchBR]); - updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight); + for (auto &[Term, Info] : Weights) { + auto *TermCopy = cast(VMap[Term]); + updateBranchWeights(TermCopy, Info); + } + // Remove Loop metadata from the latch branch instruction // because it is not the Loop's latch branch anymore. - LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); + auto *LatchTermCopy = cast(VMap[LatchTerm]); + LatchTermCopy->setMetadata(LLVMContext::MD_loop, nullptr); InsertTop = InsertBot; InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DT, LI); @@ -896,7 +881,8 @@ PHI->setIncomingValueForBlock(NewPreHeader, NewVal); } - fixupBranchWeights(Header, LatchBR, ExitWeight, FallThroughWeight); + for (const auto &[Term, Info] : Weights) + fixupBranchWeights(Term, Info); // Update Metadata for count of peeled off iterations. unsigned AlreadyPeeled = 0; diff --git a/llvm/test/Transforms/LoopUnroll/peel-branch-weights.ll b/llvm/test/Transforms/LoopUnroll/peel-branch-weights.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/peel-branch-weights.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals +; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=2 2>&1 | FileCheck %s + +declare i32 @get.x() + +; Test branch weight update for terminator with multiple fallthrough and +; multiple exit edges. +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_PEEL_BEGIN:%.*]] +; CHECK: loop.peel.begin: +; CHECK-NEXT: br label [[LOOP_PEEL:%.*]] +; CHECK: loop.peel: +; CHECK-NEXT: [[X_PEEL:%.*]] = call i32 @get.x() +; CHECK-NEXT: switch i32 [[X_PEEL]], label [[LOOP_LATCH_PEEL:%.*]] [ +; CHECK-NEXT: i32 0, label [[LOOP_LATCH_PEEL]] +; CHECK-NEXT: i32 1, label [[LOOP_EXIT:%.*]] +; CHECK-NEXT: i32 2, label [[LOOP_EXIT]] +; CHECK-NEXT: ], !prof [[PROF0:![0-9]+]] +; CHECK: loop.latch.peel: +; CHECK-NEXT: br label [[LOOP_PEEL_NEXT:%.*]] +; CHECK: loop.peel.next: +; CHECK-NEXT: br label [[LOOP_PEEL2:%.*]] +; CHECK: loop.peel2: +; CHECK-NEXT: [[X_PEEL3:%.*]] = call i32 @get.x() +; CHECK-NEXT: switch i32 [[X_PEEL3]], label [[LOOP_LATCH_PEEL4:%.*]] [ +; CHECK-NEXT: i32 0, label [[LOOP_LATCH_PEEL4]] +; CHECK-NEXT: i32 1, label [[LOOP_EXIT]] +; CHECK-NEXT: i32 2, label [[LOOP_EXIT]] +; CHECK-NEXT: ], !prof [[PROF1:![0-9]+]] +; CHECK: loop.latch.peel4: +; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]] +; CHECK: loop.peel.next1: +; CHECK-NEXT: br label [[LOOP_PEEL_NEXT5:%.*]] +; CHECK: loop.peel.next5: +; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] +; CHECK: entry.peel.newph: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[X:%.*]] = call i32 @get.x() +; CHECK-NEXT: switch i32 [[X]], label [[LOOP_LATCH:%.*]] [ +; CHECK-NEXT: i32 0, label [[LOOP_LATCH]] +; CHECK-NEXT: i32 1, label [[LOOP_EXIT_LOOPEXIT:%.*]] +; CHECK-NEXT: i32 2, label [[LOOP_EXIT_LOOPEXIT]] +; CHECK-NEXT: ], !prof [[PROF2:![0-9]+]] +; CHECK: loop.latch: +; CHECK-NEXT: br label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: loop.exit.loopexit: +; CHECK-NEXT: br label [[LOOP_EXIT]] +; CHECK: loop.exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %x = call i32 @get.x() + switch i32 %x, label %loop.latch [ + i32 0, label %loop.latch + i32 1, label %loop.exit + i32 2, label %loop.exit + ], !prof !0 + +loop.latch: + br label %loop + +loop.exit: + ret void +} + +!0 = !{!"branch_weights", i32 100, i32 200, i32 20, i32 10} + +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 100, i32 200, i32 20, i32 10} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 90, i32 180, i32 20, i32 10} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 80, i32 160, i32 20, i32 10} +; CHECK: [[LOOP3]] = distinct !{!3, !4, !5} +; CHECK: [[META4:![0-9]+]] = !{!"llvm.loop.peeled.count", i32 2} +; CHECK: [[META5:![0-9]+]] = !{!"llvm.loop.unroll.disable"} +;. diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -1140,20 +1140,33 @@ ret void } -; Invoke is not a conditional branch that we can optimize, -; so this shouldn't be peeled at all. This is a reproducer -; for a bug where evaluating the loop would fail an assertion. define void @test17() personality i8* undef{ ; CHECK-LABEL: @test17( ; CHECK-NEXT: body: +; CHECK-NEXT: br label [[LOOP_PEEL_BEGIN:%.*]] +; CHECK: loop.peel.begin: +; CHECK-NEXT: br label [[LOOP_PEEL:%.*]] +; CHECK: loop.peel: +; CHECK-NEXT: invoke void @f1() +; CHECK-NEXT: to label [[LOOP_PEEL_NEXT:%.*]] unwind label [[EH_UNW_LOOPEXIT_LOOPEXIT_SPLIT_LP:%.*]] +; CHECK: loop.peel.next: +; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]] +; CHECK: loop.peel.next1: +; CHECK-NEXT: br label [[BODY_PEEL_NEWPH:%.*]] +; CHECK: body.peel.newph: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[CONST:%.*]] = phi i64 [ -33, [[LOOP]] ], [ -20, [[BODY:%.*]] ] ; CHECK-NEXT: invoke void @f1() -; CHECK-NEXT: to label [[LOOP]] unwind label [[EH_UNW_LOOPEXIT:%.*]] -; CHECK: eh.Unw.loopexit: -; CHECK-NEXT: [[LPAD_LOOPEXIT:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: to label [[LOOP]] unwind label [[EH_UNW_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: eh.Unw.loopexit.loopexit: +; CHECK-NEXT: [[LPAD_LOOPEXIT2:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: catch i8* null +; CHECK-NEXT: br label [[EH_UNW_LOOPEXIT:%.*]] +; CHECK: eh.Unw.loopexit.loopexit.split-lp: +; CHECK-NEXT: [[LPAD_LOOPEXIT_SPLIT_LP:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null +; CHECK-NEXT: br label [[EH_UNW_LOOPEXIT]] +; CHECK: eh.Unw.loopexit: ; CHECK-NEXT: ret void ; body: @@ -1170,20 +1183,35 @@ ret void } -; Testcase reduced from PR48812. We expect no peeling -; because the latch terminator is a switch. +; Testcase reduced from PR48812. define void @test18(i32* %p) { ; CHECK-LABEL: @test18( ; CHECK-NEXT: init: +; CHECK-NEXT: br label [[LOOP_PEEL_BEGIN:%.*]] +; CHECK: loop.peel.begin: +; CHECK-NEXT: br label [[LOOP_PEEL:%.*]] +; CHECK: loop.peel: +; CHECK-NEXT: br label [[LATCH_PEEL:%.*]] +; CHECK: latch.peel: +; CHECK-NEXT: [[CONTROL_PEEL:%.*]] = load volatile i32, i32* [[P:%.*]], align 4 +; CHECK-NEXT: switch i32 [[CONTROL_PEEL]], label [[EXIT:%.*]] [ +; CHECK-NEXT: i32 2, label [[LOOP_PEEL_NEXT:%.*]] +; CHECK-NEXT: ] +; CHECK: loop.peel.next: +; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]] +; CHECK: loop.peel.next1: +; CHECK-NEXT: br label [[INIT_PEEL_NEWPH:%.*]] +; CHECK: init.peel.newph: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[CONST:%.*]] = phi i32 [ 40, [[INIT:%.*]] ], [ 0, [[LATCH:%.*]] ] -; CHECK-NEXT: br label [[LATCH]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[CONTROL:%.*]] = load volatile i32, i32* [[P:%.*]], align 4 -; CHECK-NEXT: switch i32 [[CONTROL]], label [[EXIT:%.*]] [ +; CHECK-NEXT: [[CONTROL:%.*]] = load volatile i32, i32* [[P]], align 4 +; CHECK-NEXT: switch i32 [[CONTROL]], label [[EXIT_LOOPEXIT:%.*]] [ ; CHECK-NEXT: i32 2, label [[LOOP]] -; CHECK-NEXT: ] +; CHECK-NEXT: ], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-irreducible.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-irreducible.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-irreducible.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-irreducible.ll @@ -1,18 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=1 | FileCheck %s -; Check we don't peel loops where the latch is not the exiting block. -; CHECK-LABEL: @invariant_backedge_irreducible -; CHECK: entry: -; CHECK: br label %header -; CHECK-NOT: peel -; CHECK: header: -; CHECK: br i1 {{.*}} label %latch, label %exiting -; CHECK: latch: -; CHECK: br i1 {{.*}} label %header, label %exiting -; CHECK: exiting: -; CHECK: br i1 {{.*}} label %latch, label %exit - define i32 @invariant_backedge_irreducible(i32 %a, i32 %b) { +; CHECK-LABEL: @invariant_backedge_irreducible( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[HEADER_PEEL_BEGIN:%.*]] +; CHECK: header.peel.begin: +; CHECK-NEXT: br label [[HEADER_PEEL:%.*]] +; CHECK: header.peel: +; CHECK-NEXT: br i1 false, label [[LATCH_PEEL:%.*]], label [[EXITING_PEEL:%.*]] +; CHECK: latch.peel: +; CHECK-NEXT: [[INC_PEEL:%.*]] = add i32 0, 1 +; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 0, 1000 +; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[HEADER_PEEL_NEXT:%.*]], label [[EXITING_PEEL]] +; CHECK: exiting.peel: +; CHECK-NEXT: [[CMP_EXITING_PEEL:%.*]] = phi i1 [ false, [[HEADER_PEEL]] ], [ [[CMP_PEEL]], [[LATCH_PEEL]] ] +; CHECK-NEXT: br i1 [[CMP_EXITING_PEEL]], label [[LATCH_PEEL]], label [[EXIT:%.*]] +; CHECK: header.peel.next: +; CHECK-NEXT: br label [[HEADER_PEEL_NEXT1:%.*]] +; CHECK: header.peel.next1: +; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] +; CHECK: entry.peel.newph: +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[INC:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[CMP_PHI:%.*]] = phi i1 [ [[CMP_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[CMP:%.*]], [[LATCH]] ] +; CHECK-NEXT: br i1 [[CMP_PHI]], label [[LATCH]], label [[EXITING:%.*]] +; CHECK: latch: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I]], 1 +; CHECK-NEXT: [[CMP]] = icmp slt i32 [[I]], 1000 +; CHECK-NEXT: br i1 [[CMP]], label [[HEADER]], label [[EXITING]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: exiting: +; CHECK-NEXT: [[CMP_EXITING:%.*]] = phi i1 [ [[CMP_PHI]], [[HEADER]] ], [ [[CMP]], [[LATCH]] ] +; CHECK-NEXT: br i1 [[CMP_EXITING]], label [[LATCH]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; entry: br label %header diff --git a/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll b/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll --- a/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-multiple-unreachable-exits.ll @@ -134,26 +134,52 @@ define void @peel_unreachable_and_multiple_reachable_exits(i32* %ptr, i32 %N, i32 %x) { ; CHECK-LABEL: @peel_unreachable_and_multiple_reachable_exits( ; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_BEGIN:%.*]] +; CHECK: loop.header.peel.begin: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL:%.*]] +; CHECK: loop.header.peel: +; CHECK-NEXT: [[C_PEEL:%.*]] = icmp ult i32 1, 2 +; CHECK-NEXT: br i1 [[C_PEEL]], label [[THEN_PEEL:%.*]], label [[ELSE_PEEL:%.*]] +; CHECK: else.peel: +; CHECK-NEXT: [[C_3_PEEL:%.*]] = icmp eq i32 1, [[X:%.*]] +; CHECK-NEXT: br i1 [[C_3_PEEL]], label [[UNREACHABLE_EXIT:%.*]], label [[LOOP_LATCH_PEEL:%.*]] +; CHECK: then.peel: +; CHECK-NEXT: [[C_2_PEEL:%.*]] = icmp sgt i32 1, [[X]] +; CHECK-NEXT: br i1 [[C_2_PEEL]], label [[EXIT:%.*]], label [[LOOP_LATCH_PEEL]] +; CHECK: loop.latch.peel: +; CHECK-NEXT: [[M_PEEL:%.*]] = phi i32 [ 0, [[THEN_PEEL]] ], [ [[X]], [[ELSE_PEEL]] ] +; CHECK-NEXT: [[GEP_PEEL:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 1 +; CHECK-NEXT: store i32 [[M_PEEL]], i32* [[GEP_PEEL]], align 4 +; CHECK-NEXT: [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 1, 1 +; CHECK-NEXT: [[C_4_PEEL:%.*]] = icmp ult i32 1, 1000 +; CHECK-NEXT: br i1 [[C_4_PEEL]], label [[LOOP_HEADER_PEEL_NEXT:%.*]], label [[EXIT]] +; CHECK: loop.header.peel.next: +; CHECK-NEXT: br label [[LOOP_HEADER_PEEL_NEXT1:%.*]] +; CHECK: loop.header.peel.next1: +; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] +; CHECK: entry.peel.newph: ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], 2 -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 false, label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: [[C_2:%.*]] = icmp sgt i32 [[IV]], [[X:%.*]] -; CHECK-NEXT: br i1 [[C_2]], label [[EXIT:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_LATCH]] ; CHECK: else: ; CHECK-NEXT: [[C_3:%.*]] = icmp eq i32 [[IV]], [[X]] -; CHECK-NEXT: br i1 [[C_3]], label [[UNREACHABLE_EXIT:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 [[C_3]], label [[UNREACHABLE_EXIT_LOOPEXIT:%.*]], label [[LOOP_LATCH]] ; CHECK: loop.latch: ; CHECK-NEXT: [[M:%.*]] = phi i32 [ 0, [[THEN]] ], [ [[X]], [[ELSE]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i32 [[IV]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i32 [[IV]] ; CHECK-NEXT: store i32 [[M]], i32* [[GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[C_4:%.*]] = icmp ult i32 [[IV]], 1000 -; CHECK-NEXT: br i1 [[C_4]], label [[LOOP_HEADER]], label [[EXIT]] +; CHECK-NEXT: br i1 [[C_4]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: ret void +; CHECK: unreachable.exit.loopexit: +; CHECK-NEXT: br label [[UNREACHABLE_EXIT]] ; CHECK: unreachable.exit: ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: unreachable @@ -231,7 +257,7 @@ ; CHECK-NEXT: store i32 [[M]], i32* [[GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[C_3:%.*]] = icmp ult i32 [[IV]], 1000 -; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -369,7 +395,7 @@ ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[C:%.*]] = icmp ult i32 [[IV]], [[N:%.*]] -; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]], !prof [[PROF3:![0-9]+]] +; CHECK-NEXT: br i1 [[C]], label [[THEN:%.*]], label [[ELSE:%.*]], !prof [[PROF5:![0-9]+]] ; CHECK: then: ; CHECK-NEXT: br i1 [[C_1:%.*]], label [[EXIT_1:%.*]], label [[LOOP_LATCH]] ; CHECK: else: @@ -381,7 +407,7 @@ ; CHECK-NEXT: store i32 [[M]], i32* [[GEP]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[C_3:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT:%.*]], !prof [[PROF3]] +; CHECK-NEXT: br i1 [[C_3]], label [[LOOP_HEADER]], label [[EXIT:%.*]], !prof [[PROF5]] ; CHECK: exit: ; CHECK-NEXT: ret void ; CHECK: exit.1: