diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -494,6 +494,8 @@ bool UpperBound; /// Allow peeling off loop iterations. bool AllowPeeling; + /// Allow peeling off loop iterations for loop nests. + bool AllowLoopNestsPeeling; /// Allow unrolling of all the iterations of the runtime loop remainder. bool UnrollRemainder; /// Allow unroll and jam. Used to enable unroll and jam for the target. diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -24,6 +24,7 @@ #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/IVDescriptors.h" +#include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" @@ -31,6 +32,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/ValueMapper.h" namespace llvm { @@ -426,6 +428,12 @@ /// already reversed loops in LI. /// FIXME: Consider changing the order in LoopInfo. void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist &); + +/// Recursively clone the specified loop and all of its children, +/// mapping the blocks with the specified map. +Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, + LoopInfo *LI, LPPassManager *LPM); + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -154,6 +154,10 @@ cl::desc("Allows loops to be peeled when the dynamic " "trip count is known to be low.")); +static cl::opt UnrollAllowLoopNestsPeeling( + "unroll-allow-loop-nests-peeling", cl::init(false), cl::Hidden, + cl::desc("Allows loop nests to be peeled.")); + static cl::opt UnrollUnrollRemainder( "unroll-remainder", cl::Hidden, cl::desc("Allow the loop remainder to be unrolled.")); @@ -215,6 +219,7 @@ UP.Force = false; UP.UpperBound = false; UP.AllowPeeling = true; + UP.AllowLoopNestsPeeling = false; UP.UnrollAndJam = false; UP.PeelProfiledIterations = true; UP.UnrollAndJamInnerLoopThreshold = 60; @@ -255,6 +260,8 @@ UP.UpperBound = false; if (UnrollAllowPeeling.getNumOccurrences() > 0) UP.AllowPeeling = UnrollAllowPeeling; + if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0) + UP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling; if (UnrollUnrollRemainder.getNumOccurrences() > 0) UP.UnrollRemainder = UnrollUnrollRemainder; diff --git a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -903,30 +903,6 @@ return true; } -/// Recursively clone the specified loop and all of its children, -/// mapping the blocks with the specified map. -static Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, - LPPassManager *LPM) { - Loop &New = *LI->AllocateLoop(); - if (PL) - PL->addChildLoop(&New); - else - LI->addTopLevelLoop(&New); - LPM->addLoop(New); - - // Add all of the blocks in L to the new loop. - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) - if (LI->getLoopFor(*I) == L) - New.addBasicBlockToLoop(cast(VM[*I]), *LI); - - // Add all of the subloops to the new loop. - for (Loop *I : *L) - cloneLoop(I, &New, VM, LI, LPM); - - return &New; -} - /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, /// otherwise branch to FalseDest. Insert the code immediately before OldBranch /// and remove (but not erase!) it from the function. diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp --- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -289,8 +289,10 @@ if (!canPeel(L)) return; - // Only try to peel innermost loops. - if (!L->empty()) + // Only try to peel innermost loops by default. + // The constraint can be relaxed by the target in TTI.getUnrollingPreferences + // or by the flag -unroll-allow-loop-nests-peeling. + if (!UP.AllowLoopNestsPeeling && !L->empty()) return; // If the user provided a peel count, use that. @@ -508,7 +510,10 @@ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F); NewBlocks.push_back(NewBB); - if (ParentLoop) + // If an original block is an immediate child of the loop L, its copy + // is a child of a ParentLoop after peeling. If a block is a child of + // a nested loop, it is handled in the cloneLoop() call below. + if (ParentLoop && LI->getLoopFor(*BB) == L) ParentLoop->addBasicBlockToLoop(NewBB, *LI); VMap[*BB] = NewBB; @@ -525,6 +530,12 @@ } } + // Recursively create the new Loop objects for nested loops, if any, + // to preserve LoopInfo. + for (Loop *ChildLoop : *L) { + cloneLoop(ChildLoop, ParentLoop, VMap, LI, nullptr); + } + // Hook-up the control flow for the newly inserted blocks. // The new header is hooked up directly to the "top", which is either // the original loop preheader (for the first iteration) or the previous diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -1499,3 +1499,27 @@ SmallPriorityWorklist &Worklist) { appendReversedLoopsToWorklist(LI, Worklist); } + +Loop *llvm::cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, + LoopInfo *LI, LPPassManager *LPM) { + Loop &New = *LI->AllocateLoop(); + if (PL) + PL->addChildLoop(&New); + else + LI->addTopLevelLoop(&New); + + if (LPM) + LPM->addLoop(New); + + // Add all of the blocks in L to the new loop. + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + if (LI->getLoopFor(*I) == L) + New.addBasicBlockToLoop(cast(VM[*I]), *LI); + + // Add all of the subloops to the new loop. + for (Loop *I : *L) + cloneLoop(I, &New, VM, LI, LPM); + + return &New; +} diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -403,76 +403,11 @@ ret void } -; In this case we cannot peel the inner loop, because the condition involves -; the outer induction variable. -define void @test5(i32 %k) { -; CHECK-LABEL: @test5( -; CHECK-NEXT: for.body.lr.ph: -; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] -; CHECK: outer.header: -; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[J]], 2 -; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then: -; CHECK-NEXT: call void @f1() -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: if.else: -; CHECK-NEXT: call void @f2() -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]] -; CHECK: outer.inc: -; CHECK-NEXT: [[J_INC]] = add nsw i32 [[J]], 1 -; CHECK-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]] -; CHECK-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -for.body.lr.ph: - br label %outer.header - -outer.header: - %j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ] - br label %for.body - -for.body: - %i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ] - %cmp1 = icmp ult i32 %j, 2 - br i1 %cmp1, label %if.then, label %if.else - -if.then: - call void @f1() - br label %for.inc - -if.else: - call void @f2() - br label %for.inc - -for.inc: - %inc = add nsw i32 %i.05, 1 - %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %outer.inc - -outer.inc: - %j.inc = add nsw i32 %j, 1 - %outer.cmp = icmp slt i32 %j.inc, %k - br i1 %outer.cmp, label %outer.header, label %for.end - - -for.end: - ret void -} - ; In this test, the condition involves 2 AddRecs. Without evaluating both ; AddRecs, we cannot prove that the condition becomes known in the loop body ; after peeling. -define void @test6(i32 %k) { -; CHECK-LABEL: @test6( +define void @test5(i32 %k) { +; CHECK-LABEL: @test5( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -521,8 +456,8 @@ ret void } -define void @test7(i32 %k) { -; CHECK-LABEL: @test7( +define void @test6(i32 %k) { +; CHECK-LABEL: @test6( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] ; CHECK: for.body.peel.begin: @@ -615,8 +550,8 @@ ret void } -define void @test8(i32 %k) { -; CHECK-LABEL: @test8( +define void @test7(i32 %k) { +; CHECK-LABEL: @test7( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] ; CHECK: for.body.peel.begin: @@ -711,8 +646,8 @@ ; Comparison with non-monotonic predicate due to possible wrapping, loop ; body cannot be simplified. -define void @test9(i32 %k) { -; CHECK-LABEL: @test9( +define void @test8(i32 %k) { +; CHECK-LABEL: @test8( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -751,8 +686,8 @@ } ; CHECK-NOT: llvm.loop.unroll.disable -define void @test_10__peel_first_iter_via_slt_pred(i32 %len) { -; CHECK-LABEL: @test_10__peel_first_iter_via_slt_pred( +define void @test_9__peel_first_iter_via_slt_pred(i32 %len) { +; CHECK-LABEL: @test_9__peel_first_iter_via_slt_pred( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -818,8 +753,8 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body } -define void @test_11__peel_first_iter_via_sgt_pred(i32 %len) { -; CHECK-LABEL: @test_11__peel_first_iter_via_sgt_pred( +define void @test_10__peel_first_iter_via_sgt_pred(i32 %len) { +; CHECK-LABEL: @test_10__peel_first_iter_via_sgt_pred( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -887,8 +822,8 @@ ; NOTE: here we should only peel the first iteration, ; i.e. all calls to sink() must stay in loop. -define void @test12__peel_first_iter_via_eq_pred(i32 %len) { -; CHECK-LABEL: @test12__peel_first_iter_via_eq_pred( +define void @test11__peel_first_iter_via_eq_pred(i32 %len) { +; CHECK-LABEL: @test11__peel_first_iter_via_eq_pred( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -956,8 +891,8 @@ ; NOTE: here we should only peel the first iteration, ; i.e. all calls to sink() must stay in loop. -define void @test13__peel_first_iter_via_ne_pred(i32 %len) { -; CHECK-LABEL: @test13__peel_first_iter_via_ne_pred( +define void @test12__peel_first_iter_via_ne_pred(i32 %len) { +; CHECK-LABEL: @test12__peel_first_iter_via_ne_pred( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -1024,8 +959,8 @@ } ; No peeling is profitable here. -define void @test14__ivar_mod2_is_1(i32 %len) { -; CHECK-LABEL: @test14__ivar_mod2_is_1( +define void @test13__ivar_mod2_is_1(i32 %len) { +; CHECK-LABEL: @test13__ivar_mod2_is_1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -1074,8 +1009,8 @@ } ; No peeling is profitable here. -define void @test15__ivar_mod2_is_0(i32 %len) { -; CHECK-LABEL: @test15__ivar_mod2_is_0( +define void @test14__ivar_mod2_is_0(i32 %len) { +; CHECK-LABEL: @test14__ivar_mod2_is_0( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] @@ -1123,10 +1058,10 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body } -; Similar to @test7, we need to peel one extra iteration, and we can't do that +; Similar to @test6, we need to peel one extra iteration, and we can't do that ; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all. -define void @test16(i32 %k) { -; CHECK-LABEL: @test16( +define void @test15(i32 %k) { +; CHECK-LABEL: @test15( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -1164,10 +1099,10 @@ ret void } -; Similar to @test8, we need to peel one extra iteration, and we can't do that +; Similar to @test7, we need to peel one extra iteration, and we can't do that ; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all. -define void @test17(i32 %k) { -; CHECK-LABEL: @test17( +define void @test16(i32 %k) { +; CHECK-LABEL: @test16( ; CHECK-NEXT: for.body.lr.ph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-nests.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -verify-dom-info | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -unroll-allow-loop-nests-peeling -verify-dom-info | FileCheck %s --check-prefix PEELED + +declare void @f1() +declare void @f2() + +; In this case we cannot peel the inner loop, because the condition involves +; the outer induction variable. +; Peel the loop nest if allowed by the flag -unroll-allow-loop-nests-peeling. +define void @test1(i32 %k) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[J]], 2 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: if.else: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]] +; CHECK: outer.inc: +; CHECK-NEXT: [[J_INC]] = add nsw i32 [[J]], 1 +; CHECK-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]] +; CHECK-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END:%.*]], !llvm.loop !{{.*}} +; CHECK: for.end: +; CHECK-NEXT: ret void +; +; PEELED-LABEL: @test1( +; PEELED-NEXT: for.body.lr.ph: +; PEELED-NEXT: br label [[OUTER_HEADER_PEEL_BEGIN:%.*]] +; PEELED: outer.header.peel.begin: +; PEELED-NEXT: br label [[OUTER_HEADER_PEEL:%.*]] +; PEELED: outer.header.peel: +; PEELED-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; PEELED: for.body.peel: +; PEELED-NEXT: [[I_05_PEEL:%.*]] = phi i32 [ 0, [[OUTER_HEADER_PEEL]] ], [ [[INC_PEEL:%.*]], [[FOR_INC_PEEL:%.*]] ] +; PEELED-NEXT: [[CMP1_PEEL:%.*]] = icmp ult i32 0, 2 +; PEELED-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]] +; PEELED: if.else.peel: +; PEELED-NEXT: call void @f2() +; PEELED-NEXT: br label [[FOR_INC_PEEL]] +; PEELED: if.then.peel: +; PEELED-NEXT: call void @f1() +; PEELED-NEXT: br label [[FOR_INC_PEEL]] +; PEELED: for.inc.peel: +; PEELED-NEXT: [[INC_PEEL]] = add nsw i32 [[I_05_PEEL]], 1 +; PEELED-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]] +; PEELED-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL]], label [[OUTER_INC_PEEL:%.*]] +; PEELED: outer.inc.peel: +; PEELED-NEXT: [[J_INC_PEEL:%.*]] = add nsw i32 0, 1 +; PEELED-NEXT: [[OUTER_CMP_PEEL:%.*]] = icmp slt i32 [[J_INC_PEEL]], [[K]] +; PEELED-NEXT: br i1 [[OUTER_CMP_PEEL]], label [[OUTER_HEADER_PEEL_NEXT:%.*]], label [[FOR_END:%[^,]*]] +; Verify that MD_loop metadata is dropped. +; PEELED-NOT: , !llvm.loop !{{[0-9]*}} +; PEELED: outer.header.peel.next: +; PEELED-NEXT: br label [[OUTER_HEADER_PEEL2:%.*]] +; PEELED: outer.header.peel2: +; PEELED-NEXT: br label [[FOR_BODY_PEEL3:%.*]] +; PEELED: for.body.peel3: +; PEELED-NEXT: [[I_05_PEEL4:%.*]] = phi i32 [ 0, [[OUTER_HEADER_PEEL2]] ], [ [[INC_PEEL9:%.*]], [[FOR_INC_PEEL8:%.*]] ] +; PEELED-NEXT: [[CMP1_PEEL5:%.*]] = icmp ult i32 [[J_INC_PEEL]], 2 +; PEELED-NEXT: br i1 [[CMP1_PEEL5]], label [[IF_THEN_PEEL7:%.*]], label [[IF_ELSE_PEEL6:%.*]] +; PEELED: if.else.peel6: +; PEELED-NEXT: call void @f2() +; PEELED-NEXT: br label [[FOR_INC_PEEL8]] +; PEELED: if.then.peel7: +; PEELED-NEXT: call void @f1() +; PEELED-NEXT: br label [[FOR_INC_PEEL8]] +; PEELED: for.inc.peel8: +; PEELED-NEXT: [[INC_PEEL9]] = add nsw i32 [[I_05_PEEL4]], 1 +; PEELED-NEXT: [[CMP_PEEL10:%.*]] = icmp slt i32 [[INC_PEEL9]], [[K]] +; PEELED-NEXT: br i1 [[CMP_PEEL10]], label [[FOR_BODY_PEEL3]], label [[OUTER_INC_PEEL11:%.*]] +; PEELED: outer.inc.peel11: +; PEELED-NEXT: [[J_INC_PEEL12:%.*]] = add nsw i32 [[J_INC_PEEL]], 1 +; PEELED-NEXT: [[OUTER_CMP_PEEL13:%.*]] = icmp slt i32 [[J_INC_PEEL12]], [[K]] +; PEELED-NEXT: br i1 [[OUTER_CMP_PEEL13]], label [[OUTER_HEADER_PEEL_NEXT1:%.*]], label [[FOR_END]] +; Verify that MD_loop metadata is dropped. +; PEELED-NOT: , !llvm.loop !{{[0-9]*}} +; PEELED: outer.header.peel.next1: +; PEELED-NEXT: br label [[OUTER_HEADER_PEEL_NEXT14:%.*]] +; PEELED: outer.header.peel.next14: +; PEELED-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]] +; PEELED: for.body.lr.ph.peel.newph: +; PEELED-NEXT: br label [[OUTER_HEADER:%.*]] +; PEELED: outer.header: +; PEELED-NEXT: [[J:%.*]] = phi i32 [ [[J_INC_PEEL12]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[J_INC:%.*]], [[OUTER_INC:%.*]] ] +; PEELED-NEXT: br label [[FOR_BODY:%.*]] +; PEELED: for.body: +; PEELED-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; PEELED-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; PEELED: if.then: +; PEELED-NEXT: call void @f1() +; PEELED-NEXT: br label [[FOR_INC]] +; PEELED: if.else: +; PEELED-NEXT: call void @f2() +; PEELED-NEXT: br label [[FOR_INC]] +; PEELED: for.inc: +; PEELED-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; PEELED-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] +; PEELED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[OUTER_INC]] +; PEELED: outer.inc: +; PEELED-NEXT: [[J_INC]] = add nuw nsw i32 [[J]], 1 +; PEELED-NEXT: [[OUTER_CMP:%.*]] = icmp slt i32 [[J_INC]], [[K]] +; PEELED-NEXT: br i1 [[OUTER_CMP]], label [[OUTER_HEADER]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !{{.*}} +; PEELED: for.end.loopexit: +; PEELED-NEXT: br label [[FOR_END]] +; PEELED: for.end: +; PEELED-NEXT: ret void +; +for.body.lr.ph: + br label %outer.header + +outer.header: + %j = phi i32 [ 0, %for.body.lr.ph ], [ %j.inc, %outer.inc ] + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %outer.header ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %j, 2 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + call void @f1() + br label %for.inc + +if.else: + call void @f2() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %outer.inc + +outer.inc: + %j.inc = add nsw i32 %j, 1 + %outer.cmp = icmp slt i32 %j.inc, %k + br i1 %outer.cmp, label %outer.header, label %for.end, !llvm.loop !0 + +for.end: + ret void +} + +!0 = distinct !{!0}