Index: include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- include/llvm/Transforms/Utils/UnrollLoop.h +++ include/llvm/Transforms/Utils/UnrollLoop.h @@ -71,7 +71,7 @@ void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, - unsigned &TripCount); + unsigned &TripCount, ScalarEvolution &SE); bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -794,7 +794,7 @@ } // 4th priority is loop peeling - computePeelCount(L, LoopSize, UP, TripCount); + computePeelCount(L, LoopSize, UP, TripCount, SE); if (UP.PeelCount) { UP.Runtime = false; UP.Count = 1; Index: lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollPeel.cpp +++ lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" @@ -30,6 +31,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -46,6 +48,7 @@ #include using namespace llvm; +using namespace llvm::PatternMatch; #define DEBUG_TYPE "loop-unroll" @@ -136,10 +139,71 @@ return ToInvariance; } +// Return the number of iterations to peel off that make conditions in the +// body true/false. For example, if we peel 2 iterations off the loop below, +// the condition i < 2 can be evaluated at compile time. +// for (i = 0; i < n; i++) +// if (i < 2) +// .. +// else +// .. +// } +static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, + ScalarEvolution &SE) { + unsigned DesiredPeelCount = 0; + + assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); + + for (auto *BB : L.blocks()) { + auto *BI = dyn_cast(BB->getTerminator()); + if (!BI || BI->isUnconditional()) + continue; + + // Ignore loop exit condition. + if (L.getLoopLatch() == BB) + continue; + + Value *Condition = BI->getCondition(); + Value *LeftVal, *RightVal; + CmpInst::Predicate Pred; + if (!Condition || + !match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal)))) + continue; + + const SCEV *LeftSCEV = SE.getSCEV(LeftVal); + const SCEV *RightSCEV = SE.getSCEV(RightVal); + // Check if we have a condition with one AddRec and one non AddRec + // expression. Normalize LeftSCEV to be the AddRec. + if (!isa(LeftSCEV)) { + if (isa(RightSCEV)) { + std::swap(LeftSCEV, RightSCEV); + Pred = ICmpInst::getSwappedPredicate(Pred); + } else + continue; + } else if (isa(RightSCEV)) + continue; + + // Check if extending DesiredPeelCount lets us evaluate Pred. + APInt C(16, DesiredPeelCount); + const SCEV *IterVal = cast(LeftSCEV)->evaluateAtIteration( + SE.getConstant(C), SE); + if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV)) + Pred = ICmpInst::getSwappedPredicate(Pred); + while (DesiredPeelCount < MaxPeelCount && + SE.isKnownPredicate(Pred, IterVal, RightSCEV)) { + IterVal = SE.getAddExpr( + IterVal, cast(LeftSCEV)->getStepRecurrence(SE)); + DesiredPeelCount++; + } + } + + return DesiredPeelCount; +} + // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, - unsigned &TripCount) { + unsigned &TripCount, ScalarEvolution &SE) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); UP.PeelCount = 0; if (!canPeel(L)) @@ -170,10 +234,15 @@ if (ToInvariance != InfiniteIterationsToInvariance) DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance); } + + // Pay respect to limitations implied by loop size and the max peel count. + unsigned MaxPeelCount = UnrollPeelMaxCount; + MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + + DesiredPeelCount = std::max(DesiredPeelCount, + countToEliminateCompares(*L, MaxPeelCount, SE)); + if (DesiredPeelCount > 0) { - // Pay respect to limitations implied by loop size and the max peel count. - unsigned MaxPeelCount = UnrollPeelMaxCount; - MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); // Consider max peel count limitation. assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); Index: test/Transforms/LoopUnroll/peel-loop-conditions.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -0,0 +1,460 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -loop-unroll -verify-dom-info | FileCheck %s + +declare void @f1() +declare void @f2() + +; Check that we can peel off iterations that make conditions true. +define void @test1(i32 %k) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] +; CHECK: for.body.peel.begin: +; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; CHECK: for.body.peel: +; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp ult i32 0, 2 +; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]] +; CHECK: if.else.peel: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[FOR_INC_PEEL:%.*]] +; CHECK: if.then.peel: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL]] +; CHECK: for.inc.peel: +; CHECK-NEXT: [[INC_PEEL:%.*]] = add nsw i32 0, 1 +; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.peel.next: +; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]] +; CHECK: for.body.peel2: +; CHECK-NEXT: [[CMP1_PEEL3:%.*]] = icmp ult i32 [[INC_PEEL]], 2 +; CHECK-NEXT: br i1 [[CMP1_PEEL3]], label [[IF_THEN_PEEL5:%.*]], label [[IF_ELSE_PEEL4:%.*]] +; CHECK: if.else.peel4: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[FOR_INC_PEEL6:%.*]] +; CHECK: if.then.peel5: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL6]] +; CHECK: for.inc.peel6: +; CHECK-NEXT: [[INC_PEEL7:%.*]] = add nsw i32 [[INC_PEEL]], 1 +; CHECK-NEXT: [[CMP_PEEL8:%.*]] = icmp slt i32 [[INC_PEEL7]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL8]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next1: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT9:%.*]] +; CHECK: for.body.peel.next9: +; CHECK-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]] +; CHECK: for.body.lr.ph.peel.newph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC_PEEL7]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[I_05]], 2 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: if.else: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !0 +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %i.05, 2 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + call void @f1() + br label %for.inc + +if.else: + call void @f2() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +; Check we peel off the maximum number of iterations that make conditions true. +define void @test2(i32 %k) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] +; CHECK: for.body.peel.begin: +; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; CHECK: for.body.peel: +; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp ult i32 0, 2 +; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]] +; CHECK: if.else.peel: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[IF2_PEEL:%.*]] +; CHECK: if.then.peel: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[IF2_PEEL]] +; CHECK: if2.peel: +; CHECK-NEXT: [[CMP2_PEEL:%.*]] = icmp ult i32 0, 4 +; CHECK-NEXT: br i1 [[CMP2_PEEL]], label [[IF_THEN2_PEEL:%.*]], label [[FOR_INC_PEEL:%.*]] +; CHECK: if.then2.peel: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL]] +; CHECK: for.inc.peel: +; CHECK-NEXT: [[INC_PEEL:%.*]] = add nsw i32 0, 1 +; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.peel.next: +; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]] +; CHECK: for.body.peel2: +; CHECK-NEXT: [[CMP1_PEEL3:%.*]] = icmp ult i32 [[INC_PEEL]], 2 +; CHECK-NEXT: br i1 [[CMP1_PEEL3]], label [[IF_THEN_PEEL5:%.*]], label [[IF_ELSE_PEEL4:%.*]] +; CHECK: if.else.peel4: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[IF2_PEEL6:%.*]] +; CHECK: if.then.peel5: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[IF2_PEEL6]] +; CHECK: if2.peel6: +; CHECK-NEXT: [[CMP2_PEEL7:%.*]] = icmp ult i32 [[INC_PEEL]], 4 +; CHECK-NEXT: br i1 [[CMP2_PEEL7]], label [[IF_THEN2_PEEL8:%.*]], label [[FOR_INC_PEEL9:%.*]] +; CHECK: if.then2.peel8: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL9]] +; CHECK: for.inc.peel9: +; CHECK-NEXT: [[INC_PEEL10:%.*]] = add nsw i32 [[INC_PEEL]], 1 +; CHECK-NEXT: [[CMP_PEEL11:%.*]] = icmp slt i32 [[INC_PEEL10]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL11]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next1: +; CHECK-NEXT: br label [[FOR_BODY_PEEL13:%.*]] +; CHECK: for.body.peel13: +; CHECK-NEXT: [[CMP1_PEEL14:%.*]] = icmp ult i32 [[INC_PEEL10]], 2 +; CHECK-NEXT: br i1 [[CMP1_PEEL14]], label [[IF_THEN_PEEL16:%.*]], label [[IF_ELSE_PEEL15:%.*]] +; CHECK: if.else.peel15: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[IF2_PEEL17:%.*]] +; CHECK: if.then.peel16: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[IF2_PEEL17]] +; CHECK: if2.peel17: +; CHECK-NEXT: [[CMP2_PEEL18:%.*]] = icmp ult i32 [[INC_PEEL10]], 4 +; CHECK-NEXT: br i1 [[CMP2_PEEL18]], label [[IF_THEN2_PEEL19:%.*]], label [[FOR_INC_PEEL20:%.*]] +; CHECK: if.then2.peel19: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL20]] +; CHECK: for.inc.peel20: +; CHECK-NEXT: [[INC_PEEL21:%.*]] = add nsw i32 [[INC_PEEL10]], 1 +; CHECK-NEXT: [[CMP_PEEL22:%.*]] = icmp slt i32 [[INC_PEEL21]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL22]], label [[FOR_BODY_PEEL_NEXT12:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next12: +; CHECK-NEXT: br label [[FOR_BODY_PEEL24:%.*]] +; CHECK: for.body.peel24: +; CHECK-NEXT: [[CMP1_PEEL25:%.*]] = icmp ult i32 [[INC_PEEL21]], 2 +; CHECK-NEXT: br i1 [[CMP1_PEEL25]], label [[IF_THEN_PEEL27:%.*]], label [[IF_ELSE_PEEL26:%.*]] +; CHECK: if.else.peel26: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[IF2_PEEL28:%.*]] +; CHECK: if.then.peel27: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[IF2_PEEL28]] +; CHECK: if2.peel28: +; CHECK-NEXT: [[CMP2_PEEL29:%.*]] = icmp ult i32 [[INC_PEEL21]], 4 +; CHECK-NEXT: br i1 [[CMP2_PEEL29]], label [[IF_THEN2_PEEL30:%.*]], label [[FOR_INC_PEEL31:%.*]] +; CHECK: if.then2.peel30: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL31]] +; CHECK: for.inc.peel31: +; CHECK-NEXT: [[INC_PEEL32:%.*]] = add nsw i32 [[INC_PEEL21]], 1 +; CHECK-NEXT: [[CMP_PEEL33:%.*]] = icmp slt i32 [[INC_PEEL32]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL33]], label [[FOR_BODY_PEEL_NEXT23:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next23: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT34:%.*]] +; CHECK: for.body.peel.next34: +; CHECK-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]] +; CHECK: for.body.lr.ph.peel.newph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC_PEEL32]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[I_05]], 2 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[IF2:%.*]] +; CHECK: if.else: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[IF2]] +; CHECK: if2: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i32 [[I_05]], 4 +; CHECK-NEXT: br i1 [[CMP2]], label [[IF_THEN2:%.*]], label [[FOR_INC]] +; CHECK: if.then2: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !2 +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %i.05, 2 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + call void @f1() + br label %if2 + +if.else: + call void @f2() + br label %if2 + +if2: + %cmp2 = icmp ult i32 %i.05, 4 + br i1 %cmp2, label %if.then2, label %for.inc + +if.then2: + call void @f1() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +; Check that we can peel off iterations that make a condition false. +define void @test3(i32 %k) { +; CHECK-LABEL: @test3( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] +; CHECK: for.body.peel.begin: +; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; CHECK: for.body.peel: +; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp ugt i32 0, 2 +; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]] +; CHECK: if.else.peel: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[FOR_INC_PEEL:%.*]] +; CHECK: if.then.peel: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL]] +; CHECK: for.inc.peel: +; CHECK-NEXT: [[INC_PEEL:%.*]] = add nsw i32 0, 1 +; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.peel.next: +; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]] +; CHECK: for.body.peel2: +; CHECK-NEXT: [[CMP1_PEEL3:%.*]] = icmp ugt i32 [[INC_PEEL]], 2 +; CHECK-NEXT: br i1 [[CMP1_PEEL3]], label [[IF_THEN_PEEL5:%.*]], label [[IF_ELSE_PEEL4:%.*]] +; CHECK: if.else.peel4: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[FOR_INC_PEEL6:%.*]] +; CHECK: if.then.peel5: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL6]] +; CHECK: for.inc.peel6: +; CHECK-NEXT: [[INC_PEEL7:%.*]] = add nsw i32 [[INC_PEEL]], 1 +; CHECK-NEXT: [[CMP_PEEL8:%.*]] = icmp slt i32 [[INC_PEEL7]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL8]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next1: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT9:%.*]] +; CHECK: for.body.peel.next9: +; CHECK-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]] +; CHECK: for.body.lr.ph.peel.newph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC_PEEL7]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[I_05]], 2 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: if.else: +; CHECK-NEXT: call void @f2() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !3 +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ugt i32 %i.05, 2 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + call void @f1() + br label %for.inc + +if.else: + call void @f2() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +; Test that we respect MaxPeelCount +define void @test4(i32 %k) { +; CHECK-LABEL: @test4( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] +; CHECK: for.body.peel.begin: +; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; CHECK: for.body.peel: +; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp ugt i32 0, 9999 +; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[FOR_INC_PEEL:%.*]] +; CHECK: if.then.peel: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL]] +; CHECK: for.inc.peel: +; CHECK-NEXT: [[INC_PEEL:%.*]] = add nsw i32 0, 1 +; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.peel.next: +; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]] +; CHECK: for.body.peel2: +; CHECK-NEXT: [[CMP1_PEEL3:%.*]] = icmp ugt i32 [[INC_PEEL]], 9999 +; CHECK-NEXT: br i1 [[CMP1_PEEL3]], label [[IF_THEN_PEEL4:%.*]], label [[FOR_INC_PEEL5:%.*]] +; CHECK: if.then.peel4: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL5]] +; CHECK: for.inc.peel5: +; CHECK-NEXT: [[INC_PEEL6:%.*]] = add nsw i32 [[INC_PEEL]], 1 +; CHECK-NEXT: [[CMP_PEEL7:%.*]] = icmp slt i32 [[INC_PEEL6]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL7]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next1: +; CHECK-NEXT: br label [[FOR_BODY_PEEL9:%.*]] +; CHECK: for.body.peel9: +; CHECK-NEXT: [[CMP1_PEEL10:%.*]] = icmp ugt i32 [[INC_PEEL6]], 9999 +; CHECK-NEXT: br i1 [[CMP1_PEEL10]], label [[IF_THEN_PEEL11:%.*]], label [[FOR_INC_PEEL12:%.*]] +; CHECK: if.then.peel11: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL12]] +; CHECK: for.inc.peel12: +; CHECK-NEXT: [[INC_PEEL13:%.*]] = add nsw i32 [[INC_PEEL6]], 1 +; CHECK-NEXT: [[CMP_PEEL14:%.*]] = icmp slt i32 [[INC_PEEL13]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL14]], label [[FOR_BODY_PEEL_NEXT8:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next8: +; CHECK-NEXT: br label [[FOR_BODY_PEEL16:%.*]] +; CHECK: for.body.peel16: +; CHECK-NEXT: [[CMP1_PEEL17:%.*]] = icmp ugt i32 [[INC_PEEL13]], 9999 +; CHECK-NEXT: br i1 [[CMP1_PEEL17]], label [[IF_THEN_PEEL18:%.*]], label [[FOR_INC_PEEL19:%.*]] +; CHECK: if.then.peel18: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL19]] +; CHECK: for.inc.peel19: +; CHECK-NEXT: [[INC_PEEL20:%.*]] = add nsw i32 [[INC_PEEL13]], 1 +; CHECK-NEXT: [[CMP_PEEL21:%.*]] = icmp slt i32 [[INC_PEEL20]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL21]], label [[FOR_BODY_PEEL_NEXT15:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next15: +; CHECK-NEXT: br label [[FOR_BODY_PEEL23:%.*]] +; CHECK: for.body.peel23: +; CHECK-NEXT: [[CMP1_PEEL24:%.*]] = icmp ugt i32 [[INC_PEEL20]], 9999 +; CHECK-NEXT: br i1 [[CMP1_PEEL24]], label [[IF_THEN_PEEL25:%.*]], label [[FOR_INC_PEEL26:%.*]] +; CHECK: if.then.peel25: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL26]] +; CHECK: for.inc.peel26: +; CHECK-NEXT: [[INC_PEEL27:%.*]] = add nsw i32 [[INC_PEEL20]], 1 +; CHECK-NEXT: [[CMP_PEEL28:%.*]] = icmp slt i32 [[INC_PEEL27]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL28]], label [[FOR_BODY_PEEL_NEXT22:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next22: +; CHECK-NEXT: br label [[FOR_BODY_PEEL30:%.*]] +; CHECK: for.body.peel30: +; CHECK-NEXT: [[CMP1_PEEL31:%.*]] = icmp ugt i32 [[INC_PEEL27]], 9999 +; CHECK-NEXT: br i1 [[CMP1_PEEL31]], label [[IF_THEN_PEEL32:%.*]], label [[FOR_INC_PEEL33:%.*]] +; CHECK: if.then.peel32: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL33]] +; CHECK: for.inc.peel33: +; CHECK-NEXT: [[INC_PEEL34:%.*]] = add nsw i32 [[INC_PEEL27]], 1 +; CHECK-NEXT: [[CMP_PEEL35:%.*]] = icmp slt i32 [[INC_PEEL34]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL35]], label [[FOR_BODY_PEEL_NEXT29:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next29: +; CHECK-NEXT: br label [[FOR_BODY_PEEL37:%.*]] +; CHECK: for.body.peel37: +; CHECK-NEXT: [[CMP1_PEEL38:%.*]] = icmp ugt i32 [[INC_PEEL34]], 9999 +; CHECK-NEXT: br i1 [[CMP1_PEEL38]], label [[IF_THEN_PEEL39:%.*]], label [[FOR_INC_PEEL40:%.*]] +; CHECK: if.then.peel39: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL40]] +; CHECK: for.inc.peel40: +; CHECK-NEXT: [[INC_PEEL41:%.*]] = add nsw i32 [[INC_PEEL34]], 1 +; CHECK-NEXT: [[CMP_PEEL42:%.*]] = icmp slt i32 [[INC_PEEL41]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL42]], label [[FOR_BODY_PEEL_NEXT36:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next36: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT43:%.*]] +; CHECK: for.body.peel.next43: +; CHECK-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]] +; CHECK: for.body.lr.ph.peel.newph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC_PEEL41]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[I_05]], 9999 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK: if.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !4 +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ugt i32 %i.05, 9999 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + call void @f1() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +}