diff --git a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp --- a/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -215,11 +215,10 @@ // consider AddRecs of the loop we are trying to peel and avoid // non-monotonic predicates, as we will not be able to simplify the loop // body. - // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can - // simplify the loop, if we peel 1 additional iteration, if there - // is no wrapping. + if (!LeftAR->isAffine() || LeftAR->getLoop() != &L) + continue; bool Increasing; - if (!LeftAR->isAffine() || LeftAR->getLoop() != &L || + if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) && !SE.isMonotonicPredicate(LeftAR, Pred, Increasing)) continue; (void)Increasing; @@ -238,18 +237,41 @@ Pred = ICmpInst::getInversePredicate(Pred); const SCEV *Step = LeftAR->getStepRecurrence(SE); - while (NewPeelCount < MaxPeelCount && - SE.isKnownPredicate(Pred, IterVal, RightSCEV)) { - IterVal = SE.getAddExpr(IterVal, Step); + const SCEV *NextIterVal = SE.getAddExpr(IterVal, Step); + auto PeelOneMoreIteration = [&]() { + IterVal = NextIterVal; + NextIterVal = SE.getAddExpr(IterVal, Step); NewPeelCount++; + }; + + auto CanPeelOneMoreIteration = [&]() { + return NewPeelCount < MaxPeelCount; + }; + + while (CanPeelOneMoreIteration() && + SE.isKnownPredicate(Pred, IterVal, RightSCEV)) + PeelOneMoreIteration(); + + // With *that* peel count, does the predicate !Pred becomes known in the + // first iteration of the loop body after peeling? + if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, + RightSCEV)) + continue; // If not, we're done here. + + // However, for equality comparisons, that isn't always sufficient to + // eliminate the comparsion in loop body, we may need to peel one more + // iteration. See if Pred then becomes known again, and !Pred unknown. + if (ICmpInst::isEquality(Pred) && + !SE.isKnownPredicate(Pred, IterVal, RightSCEV) && + SE.isKnownPredicate(Pred, NextIterVal, RightSCEV) && + !SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), NextIterVal, + RightSCEV)) { + if (!CanPeelOneMoreIteration()) + continue; // Need to peel one more iteration, but can't. Give up. + PeelOneMoreIteration(); // Great! } - // Only peel the loop if the monotonic predicate !Pred becomes known in the - // first iteration of the loop body after peeling. - if (NewPeelCount > DesiredPeelCount && - SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, - RightSCEV)) - DesiredPeelCount = NewPeelCount; + DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount); } return DesiredPeelCount; diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll --- a/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -loop-unroll -verify-dom-info | FileCheck %s +; RUN: opt < %s -S -loop-unroll -unroll-peel-max-count=4 -verify-dom-info | FileCheck %s declare void @f1() declare void @f2() @@ -522,22 +522,75 @@ } define void @test7(i32 %k) { -; FIXME: Could simplify loop body by peeling one additional iteration after -; i != 3 becomes false ; CHECK-LABEL: @test7( ; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] +; CHECK: for.body.peel.begin: +; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; CHECK: for.body.peel: +; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp ne i32 0, 3 +; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[FOR_INC_PEEL:%.*]] +; CHECK: if.then.peel: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL]] +; CHECK: for.inc.peel: +; CHECK-NEXT: [[INC_PEEL:%.*]] = add nsw i32 0, 1 +; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.peel.next: +; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]] +; CHECK: for.body.peel2: +; CHECK-NEXT: [[CMP1_PEEL3:%.*]] = icmp ne i32 [[INC_PEEL]], 3 +; CHECK-NEXT: br i1 [[CMP1_PEEL3]], label [[IF_THEN_PEEL4:%.*]], label [[FOR_INC_PEEL5:%.*]] +; CHECK: if.then.peel4: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL5]] +; CHECK: for.inc.peel5: +; CHECK-NEXT: [[INC_PEEL6:%.*]] = add nsw i32 [[INC_PEEL]], 1 +; CHECK-NEXT: [[CMP_PEEL7:%.*]] = icmp slt i32 [[INC_PEEL6]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL7]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next1: +; CHECK-NEXT: br label [[FOR_BODY_PEEL9:%.*]] +; CHECK: for.body.peel9: +; CHECK-NEXT: [[CMP1_PEEL10:%.*]] = icmp ne i32 [[INC_PEEL6]], 3 +; CHECK-NEXT: br i1 [[CMP1_PEEL10]], label [[IF_THEN_PEEL11:%.*]], label [[FOR_INC_PEEL12:%.*]] +; CHECK: if.then.peel11: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL12]] +; CHECK: for.inc.peel12: +; CHECK-NEXT: [[INC_PEEL13:%.*]] = add nsw i32 [[INC_PEEL6]], 1 +; CHECK-NEXT: [[CMP_PEEL14:%.*]] = icmp slt i32 [[INC_PEEL13]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL14]], label [[FOR_BODY_PEEL_NEXT8:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next8: +; CHECK-NEXT: br label [[FOR_BODY_PEEL16:%.*]] +; CHECK: for.body.peel16: +; CHECK-NEXT: [[CMP1_PEEL17:%.*]] = icmp ne i32 [[INC_PEEL13]], 3 +; CHECK-NEXT: br i1 [[CMP1_PEEL17]], label [[IF_THEN_PEEL18:%.*]], label [[FOR_INC_PEEL19:%.*]] +; CHECK: if.then.peel18: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL19]] +; CHECK: for.inc.peel19: +; CHECK-NEXT: [[INC_PEEL20:%.*]] = add nsw i32 [[INC_PEEL13]], 1 +; CHECK-NEXT: [[CMP_PEEL21:%.*]] = icmp slt i32 [[INC_PEEL20]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL21]], label [[FOR_BODY_PEEL_NEXT15:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next15: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT22:%.*]] +; CHECK: for.body.peel.next22: +; CHECK-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]] +; CHECK: for.body.lr.ph.peel.newph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[I_05]], 3 -; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC_PEEL20]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: br i1 true, label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: ; CHECK-NEXT: call void @f1() ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !6 +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -563,22 +616,75 @@ } define void @test8(i32 %k) { -; FIXME: Could simplify loop body by peeling one additional iteration after -; i == 3 becomes true. ; CHECK-LABEL: @test8( ; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] +; CHECK: for.body.peel.begin: +; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; CHECK: for.body.peel: +; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp eq i32 0, 3 +; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[FOR_INC_PEEL:%.*]] +; CHECK: if.then.peel: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL]] +; CHECK: for.inc.peel: +; CHECK-NEXT: [[INC_PEEL:%.*]] = add nsw i32 0, 1 +; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.peel.next: +; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]] +; CHECK: for.body.peel2: +; CHECK-NEXT: [[CMP1_PEEL3:%.*]] = icmp eq i32 [[INC_PEEL]], 3 +; CHECK-NEXT: br i1 [[CMP1_PEEL3]], label [[IF_THEN_PEEL4:%.*]], label [[FOR_INC_PEEL5:%.*]] +; CHECK: if.then.peel4: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL5]] +; CHECK: for.inc.peel5: +; CHECK-NEXT: [[INC_PEEL6:%.*]] = add nsw i32 [[INC_PEEL]], 1 +; CHECK-NEXT: [[CMP_PEEL7:%.*]] = icmp slt i32 [[INC_PEEL6]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL7]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next1: +; CHECK-NEXT: br label [[FOR_BODY_PEEL9:%.*]] +; CHECK: for.body.peel9: +; CHECK-NEXT: [[CMP1_PEEL10:%.*]] = icmp eq i32 [[INC_PEEL6]], 3 +; CHECK-NEXT: br i1 [[CMP1_PEEL10]], label [[IF_THEN_PEEL11:%.*]], label [[FOR_INC_PEEL12:%.*]] +; CHECK: if.then.peel11: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL12]] +; CHECK: for.inc.peel12: +; CHECK-NEXT: [[INC_PEEL13:%.*]] = add nsw i32 [[INC_PEEL6]], 1 +; CHECK-NEXT: [[CMP_PEEL14:%.*]] = icmp slt i32 [[INC_PEEL13]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL14]], label [[FOR_BODY_PEEL_NEXT8:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next8: +; CHECK-NEXT: br label [[FOR_BODY_PEEL16:%.*]] +; CHECK: for.body.peel16: +; CHECK-NEXT: [[CMP1_PEEL17:%.*]] = icmp eq i32 [[INC_PEEL13]], 3 +; CHECK-NEXT: br i1 [[CMP1_PEEL17]], label [[IF_THEN_PEEL18:%.*]], label [[FOR_INC_PEEL19:%.*]] +; CHECK: if.then.peel18: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC_PEEL19]] +; CHECK: for.inc.peel19: +; CHECK-NEXT: [[INC_PEEL20:%.*]] = add nsw i32 [[INC_PEEL13]], 1 +; CHECK-NEXT: [[CMP_PEEL21:%.*]] = icmp slt i32 [[INC_PEEL20]], [[K]] +; CHECK-NEXT: br i1 [[CMP_PEEL21]], label [[FOR_BODY_PEEL_NEXT15:%.*]], label [[FOR_END]] +; CHECK: for.body.peel.next15: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT22:%.*]] +; CHECK: for.body.peel.next22: +; CHECK-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]] +; CHECK: for.body.lr.ph.peel.newph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[I_05]], 3 -; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC_PEEL20]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: ; CHECK-NEXT: call void @f1() ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !7 +; CHECK: for.end.loopexit: +; CHECK-NEXT: br label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -687,7 +793,7 @@ ; CHECK-NEXT: call void @sink() ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_06]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[LEN]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop !6 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop !8 ; entry: %cmp5 = icmp sgt i32 %len, 0 @@ -754,7 +860,7 @@ ; CHECK-NEXT: call void @sink() ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_06]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[LEN]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop !8 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop !10 ; entry: %cmp5 = icmp sgt i32 %len, 0 @@ -779,21 +885,43 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body } +; NOTE: here we should only peel the first iteration, +; i.e. all calls to sink() must stay in loop. define void @test12__peel_first_iter_via_eq_pred(i32 %len) { ; CHECK-LABEL: @test12__peel_first_iter_via_eq_pred( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] +; CHECK: for.body.peel.begin: +; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; CHECK: for.body.peel: +; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp eq i32 0, 0 +; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_END_PEEL:%.*]] +; CHECK: if.then.peel: +; CHECK-NEXT: call void @init() +; CHECK-NEXT: br label [[IF_END_PEEL]] +; CHECK: if.end.peel: +; CHECK-NEXT: call void @sink() +; CHECK-NEXT: [[INC_PEEL:%.*]] = add nuw nsw i32 0, 1 +; CHECK-NEXT: [[EXITCOND_PEEL:%.*]] = icmp eq i32 [[INC_PEEL]], [[LEN]] +; CHECK-NEXT: br i1 [[EXITCOND_PEEL]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_PEEL_NEXT:%.*]] +; CHECK: for.body.peel.next: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT1:%.*]] +; CHECK: for.body.peel.next1: +; CHECK-NEXT: br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]] +; CHECK: for.body.preheader.peel.newph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[IF_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[I_06]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_END]] +; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[IF_END:%.*]] ], [ [[INC_PEEL]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ] +; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_END]] ; CHECK: if.then: ; CHECK-NEXT: call void @init() ; CHECK-NEXT: br label [[IF_END]] @@ -801,7 +929,7 @@ ; CHECK-NEXT: call void @sink() ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_06]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[LEN]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop !11 ; entry: %cmp5 = icmp sgt i32 %len, 0 @@ -826,21 +954,43 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body } +; NOTE: here we should only peel the first iteration, +; i.e. all calls to sink() must stay in loop. define void @test13__peel_first_iter_via_ne_pred(i32 %len) { ; CHECK-LABEL: @test13__peel_first_iter_via_ne_pred( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[LEN:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP5]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] +; CHECK: for.body.peel.begin: +; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; CHECK: for.body.peel: +; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp ne i32 0, 0 +; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_END_PEEL:%.*]], label [[IF_THEN_PEEL:%.*]] +; CHECK: if.then.peel: +; CHECK-NEXT: call void @init() +; CHECK-NEXT: br label [[IF_END_PEEL]] +; CHECK: if.end.peel: +; CHECK-NEXT: call void @sink() +; CHECK-NEXT: [[INC_PEEL:%.*]] = add nuw nsw i32 0, 1 +; CHECK-NEXT: [[EXITCOND_PEEL:%.*]] = icmp eq i32 [[INC_PEEL]], [[LEN]] +; CHECK-NEXT: br i1 [[EXITCOND_PEEL]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY_PEEL_NEXT:%.*]] +; CHECK: for.body.peel.next: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT1:%.*]] +; CHECK: for.body.peel.next1: +; CHECK-NEXT: br label [[FOR_BODY_PREHEADER_PEEL_NEWPH:%.*]] +; CHECK: for.body.preheader.peel.newph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; CHECK: for.body: -; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[IF_END:%.*]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[I_06]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[IF_END]], label [[IF_THEN:%.*]] +; CHECK-NEXT: [[I_06:%.*]] = phi i32 [ [[INC:%.*]], [[IF_END:%.*]] ], [ [[INC_PEEL]], [[FOR_BODY_PREHEADER_PEEL_NEWPH]] ] +; CHECK-NEXT: br i1 true, label [[IF_END]], label [[IF_THEN:%.*]] ; CHECK: if.then: ; CHECK-NEXT: call void @init() ; CHECK-NEXT: br label [[IF_END]] @@ -848,7 +998,7 @@ ; CHECK-NEXT: call void @sink() ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_06]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[LEN]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT_LOOPEXIT:%.*]], label [[FOR_BODY]], !llvm.loop !12 ; entry: %cmp5 = icmp sgt i32 %len, 0 @@ -873,6 +1023,7 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body } +; No peeling is profitable here. define void @test15__ivar_mod2_is_1(i32 %len) { ; CHECK-LABEL: @test15__ivar_mod2_is_1( ; CHECK-NEXT: entry: @@ -922,6 +1073,7 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body } +; No peeling is profitable here. define void @test16__ivar_mod2_is_0(i32 %len) { ; CHECK-LABEL: @test16__ivar_mod2_is_0( ; CHECK-NEXT: entry: @@ -971,5 +1123,87 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body } +; Similar to @test7, we need to peel one extra iteration, and we can't do that +; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all. +define void @test17(i32 %k) { +; CHECK-LABEL: @test17( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[I_05]], 4 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK: if.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ne i32 %i.05, 4 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + call void @f1() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +; Similar to @test8, we need to peel one extra iteration, and we can't do that +; as per the -unroll-peel-max-count=4, so this shouldn't be peeled at all. +define void @test18(i32 %k) { +; CHECK-LABEL: @test18( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[I_05]], 4 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK: if.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp eq i32 %i.05, 4 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + call void @f1() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + declare void @init() declare void @sink()