Index: llvm/include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -73,7 +73,6 @@ bool PreserveCondBr; bool PreserveOnlyFirst; unsigned TripMultiple; - unsigned PeelCount; bool UnrollRemainder; bool ForgetAllSCEV; }; Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1158,6 +1158,32 @@ if (TripCount && UP.Count > TripCount) UP.Count = TripCount; + if (PP.PeelCount) { + if (UP.Count != 1) { + report_fatal_error("Cannot specify both explicit peel count and " + "explicit unroll count"); + } + + LLVM_DEBUG(dbgs() << "PEELING loop %" << L->getHeader()->getName() + << " with iteration count " << PP.PeelCount << "!\n"); + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), + L->getHeader()) + << " peeled loop by " << ore::NV("PeelCount", PP.PeelCount) + << " iterations"; + }); + + if (peelLoop(L, PP.PeelCount, LI, &SE, &DT, &AC, PreserveLCSSA)) { + simplifyLoopAfterUnroll(L, true, LI, &SE, &DT, &AC, &TTI); + // If the loop was peeled, we already "used up" the profile information + // we had, so we don't want to unroll or peel again. + if (PP.PeelProfiledIterations) + L->setLoopAlreadyUnrolled(); + return LoopUnrollResult::PartiallyUnrolled; + } + return LoopUnrollResult::Unmodified; + } + // Save loop properties before it is transformed. MDNode *OrigLoopID = L->getLoopID(); @@ -1166,7 +1192,7 @@ LoopUnrollResult UnrollResult = UnrollLoop( L, {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, - UseUpperBound, MaxOrZero, TripMultiple, PP.PeelCount, UP.UnrollRemainder, + UseUpperBound, MaxOrZero, TripMultiple, UP.UnrollRemainder, ForgetAllSCEV}, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); if (UnrollResult == LoopUnrollResult::Unmodified) @@ -1195,10 +1221,7 @@ // If loop has an unroll count pragma or unrolled by explicitly set count // mark loop as unrolled to prevent unrolling beyond that requested. - // If the loop was peeled, we already "used up" the profile information - // we had, so we don't want to unroll or peel again. - if (UnrollResult != LoopUnrollResult::FullyUnrolled && - (IsCountSetExplicitly || (PP.PeelProfiledIterations && PP.PeelCount))) + if (UnrollResult != LoopUnrollResult::FullyUnrolled && IsCountSetExplicitly) L->setLoopAlreadyUnrolled(); return UnrollResult; Index: llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -59,7 +59,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/LoopPeel.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" @@ -269,9 +268,6 @@ /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and /// AllowExpensiveTripCount is false. /// -/// If we want to perform PGO-based loop peeling, PeelCount is set to the -/// number of iterations we want to peel off. -/// /// The LoopInfo Analysis that is passed will be kept consistent. /// /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and @@ -321,7 +317,7 @@ ULO.Count = ULO.TripCount; // Don't enter the unroll code if there is nothing to do. - if (ULO.TripCount == 0 && ULO.Count < 2 && ULO.PeelCount == 0) { + if (ULO.TripCount == 0 && ULO.Count < 2) { LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); return LoopUnrollResult::Unmodified; } @@ -339,28 +335,6 @@ bool RuntimeTripCount = (ULO.TripCount == 0 && ULO.Count > 0 && ULO.AllowRuntime); - assert((!RuntimeTripCount || !ULO.PeelCount) && - "Did not expect runtime trip-count unrolling " - "and peeling for the same loop"); - - bool Peeled = false; - if (ULO.PeelCount) { - Peeled = peelLoop(L, ULO.PeelCount, LI, SE, DT, AC, PreserveLCSSA); - - // Successful peeling may result in a change in the loop preheader/trip - // counts. If we later unroll the loop, we want these to be updated. - if (Peeled) { - // According to our guards and profitability checks the only - // meaningful exit should be latch block. Other exits go to deopt, - // so we do not worry about them. - BasicBlock *ExitingBlock = L->getLoopLatch(); - assert(ExitingBlock && "Loop without exiting block?"); - assert(L->isLoopExiting(ExitingBlock) && "Latch is not exiting?"); - ULO.TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); - ULO.TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); - } - } - // All these values should be taken only after peeling because they might have // changed. BasicBlock *Preheader = L->getLoopPreheader(); @@ -398,9 +372,6 @@ else if (BasicBlock *ExitingBlock = L->getExitingBlock()) ExitingBI = dyn_cast(ExitingBlock->getTerminator()); if (!LatchBI || (LatchBI->isConditional() && !LatchIsExiting)) { - // If the peeling guard is changed this assert may be relaxed or even - // deleted. - assert(!Peeled && "Peeling guard changed!"); LLVM_DEBUG( dbgs() << "Can't unroll; a conditional latch must exit the loop"); return LoopUnrollResult::Unmodified; @@ -468,16 +439,6 @@ << "completely unrolled loop with " << NV("UnrollCount", ULO.TripCount) << " iterations"; }); - } else if (ULO.PeelCount) { - LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName() - << " with iteration count " << ULO.PeelCount << "!\n"); - if (ORE) - ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), - L->getHeader()) - << " peeled loop by " << NV("PeelCount", ULO.PeelCount) - << " iterations"; - }); } else { auto DiagBuilder = [&]() { OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), @@ -839,8 +800,8 @@ // At this point, the code is well formed. We now simplify the unrolled loop, // doing constant propagation and dead code elimination as we go. - simplifyLoopAfterUnroll(L, !CompletelyUnroll && (ULO.Count > 1 || Peeled), LI, - SE, DT, AC, TTI); + simplifyLoopAfterUnroll(L, !CompletelyUnroll && ULO.Count > 1, LI, SE, DT, AC, + TTI); NumCompletelyUnrolled += CompletelyUnroll; ++NumUnrolled; Index: llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -988,7 +988,7 @@ /*Force*/ false, /*AllowRuntime*/ false, /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, - /*PeelCount*/ 0, /*UnrollRemainder*/ false, ForgetAllSCEV}, + /*UnrollRemainder*/ false, ForgetAllSCEV}, LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA); } Index: llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/peel-loop-and-unroll.ll @@ -0,0 +1,22 @@ +; RUN: not --crash opt -loop-unroll -unroll-peel-count=2 -unroll-count=2 -S < %s 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: Cannot specify both explicit peel count and explicit unroll count + +@a = global [8 x i32] zeroinitializer, align 16 + +define void @test1() { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 %indvars.iv + %0 = trunc i64 %indvars.iv to i32 + store i32 %0, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 8 + br i1 %exitcond, label %for.body, label %for.exit + +for.exit: ; preds = %for.body + ret void +} Index: llvm/test/Transforms/LoopUnroll/pr33437.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/pr33437.ll +++ /dev/null @@ -1,99 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -loop-unroll -unroll-count=4 -unroll-peel-count=1 < %s | FileCheck %s - -declare zeroext i8 @patatino() - -define fastcc void @tinky() { -; CHECK-LABEL: @tinky( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[NEXT:%.*]] -; CHECK: loopexit: -; CHECK-NEXT: ret void -; CHECK: next: -; CHECK-NEXT: br label [[LOOP_PEEL_BEGIN:%.*]] -; CHECK: loop.peel.begin: -; CHECK-NEXT: br label [[LOOP_PEEL:%.*]] -; CHECK: loop.peel: -; CHECK-NEXT: [[CALL593_PEEL:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: br i1 false, label [[LOOP_PEEL_NEXT:%.*]], label [[LOOPEXIT:%.*]] -; CHECK: loop.peel.next: -; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]] -; CHECK: loop.peel.next1: -; CHECK-NEXT: br label [[NEXT_PEEL_NEWPH:%.*]] -; CHECK: next.peel.newph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[CALL593:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: br label [[LOOPEXIT]] -; -entry: - br label %next - -loopexit: - ret void - -next: - br label %loop - -loop: - %a = phi i8 [ undef, %next ], [ %call593, %loop ] - %b = phi i32 [ 0, %next ], [ 1, %loop ] - %call593 = tail call zeroext i8 @patatino() - br i1 false, label %loop, label %loopexit -} - -define void @tinky2() { -; CHECK-LABEL: @tinky2( -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[NEXT:%.*]] -; CHECK: loopexit.loopexit: -; CHECK-NEXT: br label [[LOOPEXIT:%.*]] -; CHECK: loopexit: -; CHECK-NEXT: ret void -; CHECK: next: -; CHECK-NEXT: br label [[LOOP_PEEL_BEGIN:%.*]] -; CHECK: loop.peel.begin: -; CHECK-NEXT: br label [[LOOP_PEEL:%.*]] -; CHECK: loop.peel: -; CHECK-NEXT: [[CALL593_PEEL:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: [[B_NEXT_PEEL:%.*]] = add i32 0, 1 -; CHECK-NEXT: [[COND_PEEL:%.*]] = icmp ne i32 0, 30 -; CHECK-NEXT: br i1 [[COND_PEEL]], label [[LOOP_PEEL_NEXT:%.*]], label [[LOOPEXIT]] -; CHECK: loop.peel.next: -; CHECK-NEXT: br label [[LOOP_PEEL_NEXT1:%.*]] -; CHECK: loop.peel.next1: -; CHECK-NEXT: br label [[NEXT_PEEL_NEWPH:%.*]] -; CHECK: next.peel.newph: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[B:%.*]] = phi i32 [ [[B_NEXT_PEEL]], [[NEXT_PEEL_NEWPH]] ], [ [[B_NEXT_3:%.*]], [[LOOP_2:%.*]] ] -; CHECK-NEXT: [[CALL593:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: [[B_NEXT:%.*]] = add nuw nsw i32 [[B]], 1 -; CHECK-NEXT: [[CALL593_1:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: [[B_NEXT_1:%.*]] = add nuw nsw i32 [[B_NEXT]], 1 -; CHECK-NEXT: [[COND_1:%.*]] = icmp ne i32 [[B_NEXT]], 30 -; CHECK-NEXT: br i1 [[COND_1]], label [[LOOP_2]], label [[LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop !0 -; CHECK: loop.2: -; CHECK-NEXT: [[CALL593_2:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: [[B_NEXT_2:%.*]] = add nuw nsw i32 [[B_NEXT_1]], 1 -; CHECK-NEXT: [[CALL593_3:%.*]] = tail call zeroext i8 @patatino() -; CHECK-NEXT: [[B_NEXT_3]] = add nuw nsw i32 [[B_NEXT_2]], 1 -; CHECK-NEXT: br label [[LOOP]], !llvm.loop !2 -; -entry: - br label %next - -loopexit: - ret void - -next: - br label %loop - -loop: - %a = phi i8 [ undef, %next ], [ %call593, %loop ] - %b = phi i32 [ 0, %next ], [ %b.next, %loop ] - %call593 = tail call zeroext i8 @patatino() - %b.next = add i32 %b, 1 - %cond = icmp ne i32 %b, 30 - br i1 %cond, label %loop, label %loopexit -} Index: llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/pr45939-peel-count-and-complete-unroll.ll +++ /dev/null @@ -1,228 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -loop-unroll -unroll-peel-count=2 -S %s | FileCheck --check-prefix=PEEL2 %s -; RUN: opt -loop-unroll -unroll-peel-count=8 -S %s | FileCheck --check-prefix=PEEL8 %s -; RUN: opt -loop-unroll -unroll-peel-count=2 -unroll-count=2 -S %s | FileCheck --check-prefix=PEEL2UNROLL2 %s - -; Test case for PR45939. Make sure unroll count is adjusted when loop is peeled and unrolled. - -@a = global [8 x i32] zeroinitializer, align 16 - -define void @test1() { -; PEEL2-LABEL: @test1( -; PEEL2-NEXT: entry: -; PEEL2-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] -; PEEL2: for.body.peel.begin: -; PEEL2-NEXT: br label [[FOR_BODY_PEEL:%.*]] -; PEEL2: for.body.peel: -; PEEL2-NEXT: [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 0 -; PEEL2-NEXT: [[TMP0:%.*]] = trunc i64 0 to i32 -; PEEL2-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX_PEEL]], align 4 -; PEEL2-NEXT: [[INDVARS_IV_NEXT_PEEL:%.*]] = add nuw nsw i64 0, 1 -; PEEL2-NEXT: [[EXITCOND_PEEL:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL]], 8 -; PEEL2-NEXT: br i1 [[EXITCOND_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_EXIT:%.*]] -; PEEL2: for.body.peel.next: -; PEEL2-NEXT: br label [[FOR_BODY_PEEL2:%.*]] -; PEEL2: for.body.peel2: -; PEEL2-NEXT: [[ARRAYIDX_PEEL3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL]] -; PEEL2-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL]] to i32 -; PEEL2-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX_PEEL3]], align 4 -; PEEL2-NEXT: [[INDVARS_IV_NEXT_PEEL4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL]], 1 -; PEEL2-NEXT: [[EXITCOND_PEEL5:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL4]], 8 -; PEEL2-NEXT: br i1 [[EXITCOND_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_EXIT]] -; PEEL2: for.body.peel.next1: -; PEEL2-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]] -; PEEL2: for.body.peel.next6: -; PEEL2-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] -; PEEL2: entry.peel.newph: -; PEEL2-NEXT: br label [[FOR_BODY:%.*]] -; PEEL2: for.body: -; PEEL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL4]] -; PEEL2-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL4]] to i32 -; PEEL2-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 -; PEEL2-NEXT: store i32 3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 3), align 4 -; PEEL2-NEXT: store i32 4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 4), align 4 -; PEEL2-NEXT: store i32 5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 5), align 4 -; PEEL2-NEXT: store i32 6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 6), align 4 -; PEEL2-NEXT: store i32 7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 0, i64 7), align 4 -; PEEL2-NEXT: store i32 8, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @a, i64 1, i64 0), align 4 -; PEEL2-NEXT: store i32 9, i32* getelementptr ([8 x i32], [8 x i32]* @a, i64 1, i64 1), align 4 -; PEEL2-NEXT: br label [[FOR_EXIT]] -; PEEL2: for.exit: -; PEEL2-NEXT: ret void -; -; PEEL8-LABEL: @test1( -; PEEL8-NEXT: entry: -; PEEL8-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] -; PEEL8: for.body.peel.begin: -; PEEL8-NEXT: br label [[FOR_BODY_PEEL:%.*]] -; PEEL8: for.body.peel: -; PEEL8-NEXT: [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 0 -; PEEL8-NEXT: [[TMP0:%.*]] = trunc i64 0 to i32 -; PEEL8-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX_PEEL]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_PEEL:%.*]] = add nuw nsw i64 0, 1 -; PEEL8-NEXT: [[EXITCOND_PEEL:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL]], 8 -; PEEL8-NEXT: br i1 [[EXITCOND_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_EXIT:%.*]] -; PEEL8: for.body.peel.next: -; PEEL8-NEXT: br label [[FOR_BODY_PEEL2:%.*]] -; PEEL8: for.body.peel2: -; PEEL8-NEXT: [[ARRAYIDX_PEEL3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL]] -; PEEL8-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL]] to i32 -; PEEL8-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX_PEEL3]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_PEEL4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL]], 1 -; PEEL8-NEXT: [[EXITCOND_PEEL5:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL4]], 8 -; PEEL8-NEXT: br i1 [[EXITCOND_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_EXIT]] -; PEEL8: for.body.peel.next1: -; PEEL8-NEXT: br label [[FOR_BODY_PEEL7:%.*]] -; PEEL8: for.body.peel7: -; PEEL8-NEXT: [[ARRAYIDX_PEEL8:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL4]] -; PEEL8-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL4]] to i32 -; PEEL8-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX_PEEL8]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_PEEL9:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL4]], 1 -; PEEL8-NEXT: [[EXITCOND_PEEL10:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL9]], 8 -; PEEL8-NEXT: br i1 [[EXITCOND_PEEL10]], label [[FOR_BODY_PEEL_NEXT6:%.*]], label [[FOR_EXIT]] -; PEEL8: for.body.peel.next6: -; PEEL8-NEXT: br label [[FOR_BODY_PEEL12:%.*]] -; PEEL8: for.body.peel12: -; PEEL8-NEXT: [[ARRAYIDX_PEEL13:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL9]] -; PEEL8-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL9]] to i32 -; PEEL8-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX_PEEL13]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_PEEL14:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL9]], 1 -; PEEL8-NEXT: [[EXITCOND_PEEL15:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL14]], 8 -; PEEL8-NEXT: br i1 [[EXITCOND_PEEL15]], label [[FOR_BODY_PEEL_NEXT11:%.*]], label [[FOR_EXIT]] -; PEEL8: for.body.peel.next11: -; PEEL8-NEXT: br label [[FOR_BODY_PEEL17:%.*]] -; PEEL8: for.body.peel17: -; PEEL8-NEXT: [[ARRAYIDX_PEEL18:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL14]] -; PEEL8-NEXT: [[TMP4:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL14]] to i32 -; PEEL8-NEXT: store i32 [[TMP4]], i32* [[ARRAYIDX_PEEL18]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_PEEL19:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL14]], 1 -; PEEL8-NEXT: [[EXITCOND_PEEL20:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL19]], 8 -; PEEL8-NEXT: br i1 [[EXITCOND_PEEL20]], label [[FOR_BODY_PEEL_NEXT16:%.*]], label [[FOR_EXIT]] -; PEEL8: for.body.peel.next16: -; PEEL8-NEXT: br label [[FOR_BODY_PEEL22:%.*]] -; PEEL8: for.body.peel22: -; PEEL8-NEXT: [[ARRAYIDX_PEEL23:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL19]] -; PEEL8-NEXT: [[TMP5:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL19]] to i32 -; PEEL8-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX_PEEL23]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_PEEL24:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL19]], 1 -; PEEL8-NEXT: [[EXITCOND_PEEL25:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL24]], 8 -; PEEL8-NEXT: br i1 [[EXITCOND_PEEL25]], label [[FOR_BODY_PEEL_NEXT21:%.*]], label [[FOR_EXIT]] -; PEEL8: for.body.peel.next21: -; PEEL8-NEXT: br label [[FOR_BODY_PEEL27:%.*]] -; PEEL8: for.body.peel27: -; PEEL8-NEXT: [[ARRAYIDX_PEEL28:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL24]] -; PEEL8-NEXT: [[TMP6:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL24]] to i32 -; PEEL8-NEXT: store i32 [[TMP6]], i32* [[ARRAYIDX_PEEL28]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_PEEL29:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL24]], 1 -; PEEL8-NEXT: [[EXITCOND_PEEL30:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL29]], 8 -; PEEL8-NEXT: br i1 [[EXITCOND_PEEL30]], label [[FOR_BODY_PEEL_NEXT26:%.*]], label [[FOR_EXIT]] -; PEEL8: for.body.peel.next26: -; PEEL8-NEXT: br label [[FOR_BODY_PEEL32:%.*]] -; PEEL8: for.body.peel32: -; PEEL8-NEXT: [[ARRAYIDX_PEEL33:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL29]] -; PEEL8-NEXT: [[TMP7:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL29]] to i32 -; PEEL8-NEXT: store i32 [[TMP7]], i32* [[ARRAYIDX_PEEL33]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_PEEL34:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL29]], 1 -; PEEL8-NEXT: [[EXITCOND_PEEL35:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL34]], 8 -; PEEL8-NEXT: br i1 [[EXITCOND_PEEL35]], label [[FOR_BODY_PEEL_NEXT31:%.*]], label [[FOR_EXIT]] -; PEEL8: for.body.peel.next31: -; PEEL8-NEXT: br label [[FOR_BODY_PEEL_NEXT36:%.*]] -; PEEL8: for.body.peel.next36: -; PEEL8-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] -; PEEL8: entry.peel.newph: -; PEEL8-NEXT: br label [[FOR_BODY:%.*]] -; PEEL8: for.body: -; PEEL8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL34]] -; PEEL8-NEXT: [[TMP8:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL34]] to i32 -; PEEL8-NEXT: store i32 [[TMP8]], i32* [[ARRAYIDX]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL34]], 1 -; PEEL8-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT]] -; PEEL8-NEXT: [[TMP9:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; PEEL8-NEXT: store i32 [[TMP9]], i32* [[ARRAYIDX_1]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 -; PEEL8-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_1]] -; PEEL8-NEXT: [[TMP10:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32 -; PEEL8-NEXT: store i32 [[TMP10]], i32* [[ARRAYIDX_2]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1 -; PEEL8-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_2]] -; PEEL8-NEXT: [[TMP11:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32 -; PEEL8-NEXT: store i32 [[TMP11]], i32* [[ARRAYIDX_3]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 -; PEEL8-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_3]] -; PEEL8-NEXT: [[TMP12:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32 -; PEEL8-NEXT: store i32 [[TMP12]], i32* [[ARRAYIDX_4]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1 -; PEEL8-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_4]] -; PEEL8-NEXT: [[TMP13:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32 -; PEEL8-NEXT: store i32 [[TMP13]], i32* [[ARRAYIDX_5]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1 -; PEEL8-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_5]] -; PEEL8-NEXT: [[TMP14:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32 -; PEEL8-NEXT: store i32 [[TMP14]], i32* [[ARRAYIDX_6]], align 4 -; PEEL8-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1 -; PEEL8-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_6]] -; PEEL8-NEXT: [[TMP15:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32 -; PEEL8-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX_7]], align 4 -; PEEL8-NEXT: br label [[FOR_EXIT]] -; PEEL8: for.exit: -; PEEL8-NEXT: ret void -; -; PEEL2UNROLL2-LABEL: @test1( -; PEEL2UNROLL2-NEXT: entry: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] -; PEEL2UNROLL2: for.body.peel.begin: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL:%.*]] -; PEEL2UNROLL2: for.body.peel: -; PEEL2UNROLL2-NEXT: [[ARRAYIDX_PEEL:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 0 -; PEEL2UNROLL2-NEXT: [[TMP0:%.*]] = trunc i64 0 to i32 -; PEEL2UNROLL2-NEXT: store i32 [[TMP0]], i32* [[ARRAYIDX_PEEL]], align 4 -; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_PEEL:%.*]] = add nuw nsw i64 0, 1 -; PEEL2UNROLL2-NEXT: [[EXITCOND_PEEL:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL]], 8 -; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_EXIT:%.*]] -; PEEL2UNROLL2: for.body.peel.next: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL2:%.*]] -; PEEL2UNROLL2: for.body.peel2: -; PEEL2UNROLL2-NEXT: [[ARRAYIDX_PEEL3:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT_PEEL]] -; PEEL2UNROLL2-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV_NEXT_PEEL]] to i32 -; PEEL2UNROLL2-NEXT: store i32 [[TMP1]], i32* [[ARRAYIDX_PEEL3]], align 4 -; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_PEEL4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_PEEL]], 1 -; PEEL2UNROLL2-NEXT: [[EXITCOND_PEEL5:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_PEEL4]], 8 -; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_EXIT]] -; PEEL2UNROLL2: for.body.peel.next1: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY_PEEL_NEXT6:%.*]] -; PEEL2UNROLL2: for.body.peel.next6: -; PEEL2UNROLL2-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] -; PEEL2UNROLL2: entry.peel.newph: -; PEEL2UNROLL2-NEXT: br label [[FOR_BODY:%.*]] -; PEEL2UNROLL2: for.body: -; PEEL2UNROLL2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PEEL4]], [[ENTRY_PEEL_NEWPH]] ], [ [[INDVARS_IV_NEXT_1:%.*]], [[FOR_BODY]] ] -; PEEL2UNROLL2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV]] -; PEEL2UNROLL2-NEXT: [[TMP2:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; PEEL2UNROLL2-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 -; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; PEEL2UNROLL2-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 [[INDVARS_IV_NEXT]] -; PEEL2UNROLL2-NEXT: [[TMP3:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; PEEL2UNROLL2-NEXT: store i32 [[TMP3]], i32* [[ARRAYIDX_1]], align 4 -; PEEL2UNROLL2-NEXT: [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 -; PEEL2UNROLL2-NEXT: [[EXITCOND_1:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT_1]], 8 -; PEEL2UNROLL2-NEXT: br i1 [[EXITCOND_1]], label [[FOR_BODY]], label [[FOR_EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] -; PEEL2UNROLL2: for.exit.loopexit: -; PEEL2UNROLL2-NEXT: br label [[FOR_EXIT]] -; PEEL2UNROLL2: for.exit: -; PEEL2UNROLL2-NEXT: ret void -; -entry: - br label %for.body - -for.body: ; preds = %entry, %for.body - %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] - %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* @a, i64 0, i64 %indvars.iv - %0 = trunc i64 %indvars.iv to i32 - store i32 %0, i32* %arrayidx, align 4 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp ne i64 %indvars.iv.next, 8 - br i1 %exitcond, label %for.body, label %for.exit - -for.exit: ; preds = %for.body - ret void -} Index: llvm/test/Transforms/LoopUnroll/wrong_assert_in_peeling.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/wrong_assert_in_peeling.ll +++ llvm/test/Transforms/LoopUnroll/wrong_assert_in_peeling.ll @@ -39,13 +39,13 @@ ; CHECK-NEXT: [[TMP4]] = add nsw i32 [[TMP3]], [[TMP]] ; CHECK-NEXT: br label [[BB5:%.*]] ; CHECK: bb5: -; CHECK-NEXT: br i1 false, label [[BB7:%.*]], label [[BB15_LOOPEXIT:%.*]] +; CHECK-NEXT: br i1 undef, label [[BB7:%.*]], label [[BB15_LOOPEXIT:%.*]] ; CHECK: bb7: ; CHECK-NEXT: br i1 undef, label [[BB10:%.*]], label [[BB10]] ; CHECK: bb10: -; CHECK-NEXT: br i1 false, label [[BB12]], label [[BB17_LOOPEXIT:%.*]] +; CHECK-NEXT: br i1 undef, label [[BB12]], label [[BB17_LOOPEXIT:%.*]] ; CHECK: bb12: -; CHECK-NEXT: br i1 false, label [[BB13_LOOPEXIT:%.*]], label [[BB2]], !llvm.loop !0 +; CHECK-NEXT: br i1 false, label [[BB13_LOOPEXIT:%.*]], label [[BB2]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: bb13.loopexit: ; CHECK-NEXT: br label [[BB13]] ; CHECK: bb13: