Index: llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -190,14 +190,25 @@ const SCEVAddRecExpr *LeftAR = cast(LeftSCEV); - // Avoid huge SCEV computations in the loop below and make sure we only - // consider AddRecs of the loop we are trying to peel. - if (!LeftAR->isAffine() || LeftAR->getLoop() != &L) + // Avoid huge SCEV computations in the loop below, make sure we only + // consider AddRecs of the loop we are trying to peel and avoid + // non-monotonic predicates, as we will not be able to simplify the loop + // body. + // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can + // simplify the loop, if we peel 1 additional iteration, if there + // is no wrapping. + bool Increasing; + if (!LeftAR->isAffine() || LeftAR->getLoop() != &L || + !SE.isMonotonicPredicate(LeftAR, Pred, Increasing)) continue; + (void)Increasing; + + // Check if extending the current DesiredPeelCount lets us evaluate Pred + // or !Pred in the loop body statically. + unsigned NewPeelCount = DesiredPeelCount; - // Check if extending DesiredPeelCount lets us evaluate Pred. const SCEV *IterVal = LeftAR->evaluateAtIteration( - SE.getConstant(LeftSCEV->getType(), DesiredPeelCount), SE); + SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE); // If the original condition is not known, get the negated predicate // (which holds on the else branch) and check if it is known. This allows @@ -206,11 +217,18 @@ Pred = ICmpInst::getInversePredicate(Pred); const SCEV *Step = LeftAR->getStepRecurrence(SE); - while (DesiredPeelCount < MaxPeelCount && + while (NewPeelCount < MaxPeelCount && SE.isKnownPredicate(Pred, IterVal, RightSCEV)) { IterVal = SE.getAddExpr(IterVal, Step); - DesiredPeelCount++; + NewPeelCount++; } + + // Only peel the loop if the monotonic predicate !Pred becomes known in the + // first iteration of the loop body after peeling. + if (NewPeelCount > DesiredPeelCount && + SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, + RightSCEV)) + DesiredPeelCount = NewPeelCount; } return DesiredPeelCount; Index: llvm/trunk/test/Transforms/LoopUnroll/peel-loop-conditions.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/peel-loop-conditions.ll +++ llvm/trunk/test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -338,114 +338,23 @@ ret void } -; Test that we respect MaxPeelCount +; Test that we only peel off iterations if it simplifies a condition in the +; loop body after peeling at most MaxPeelCount iterations. define void @test4(i32 %k) { ; CHECK-LABEL: @test4( ; CHECK-NEXT: for.body.lr.ph: -; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] -; CHECK: for.body.peel.begin: -; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]] -; CHECK: for.body.peel: -; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp ugt i32 0, 9999 -; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[FOR_INC_PEEL:%.*]] -; CHECK: if.then.peel: -; CHECK-NEXT: call void @f1() -; CHECK-NEXT: br label [[FOR_INC_PEEL]] -; CHECK: for.inc.peel: -; CHECK-NEXT: [[INC_PEEL:%.*]] = add nsw i32 0, 1 -; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]] -; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body.peel.next: -; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]] -; CHECK: for.body.peel2: -; CHECK-NEXT: [[CMP1_PEEL3:%.*]] = icmp ugt i32 [[INC_PEEL]], 9999 -; CHECK-NEXT: br i1 [[CMP1_PEEL3]], label [[IF_THEN_PEEL4:%.*]], label [[FOR_INC_PEEL5:%.*]] -; CHECK: if.then.peel4: -; CHECK-NEXT: call void @f1() -; CHECK-NEXT: br label [[FOR_INC_PEEL5]] -; CHECK: for.inc.peel5: -; CHECK-NEXT: [[INC_PEEL6:%.*]] = add nsw i32 [[INC_PEEL]], 1 -; CHECK-NEXT: [[CMP_PEEL7:%.*]] = icmp slt i32 [[INC_PEEL6]], [[K]] -; CHECK-NEXT: br i1 [[CMP_PEEL7]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_END]] -; CHECK: for.body.peel.next1: -; CHECK-NEXT: br label [[FOR_BODY_PEEL9:%.*]] -; CHECK: for.body.peel9: -; CHECK-NEXT: [[CMP1_PEEL10:%.*]] = icmp ugt i32 [[INC_PEEL6]], 9999 -; CHECK-NEXT: br i1 [[CMP1_PEEL10]], label [[IF_THEN_PEEL11:%.*]], label [[FOR_INC_PEEL12:%.*]] -; CHECK: if.then.peel11: -; CHECK-NEXT: call void @f1() -; CHECK-NEXT: br label [[FOR_INC_PEEL12]] -; CHECK: for.inc.peel12: -; CHECK-NEXT: [[INC_PEEL13:%.*]] = add nsw i32 [[INC_PEEL6]], 1 -; CHECK-NEXT: [[CMP_PEEL14:%.*]] = icmp slt i32 [[INC_PEEL13]], [[K]] -; CHECK-NEXT: br i1 [[CMP_PEEL14]], label [[FOR_BODY_PEEL_NEXT8:%.*]], label [[FOR_END]] -; CHECK: for.body.peel.next8: -; CHECK-NEXT: br label [[FOR_BODY_PEEL16:%.*]] -; CHECK: for.body.peel16: -; CHECK-NEXT: [[CMP1_PEEL17:%.*]] = icmp ugt i32 [[INC_PEEL13]], 9999 -; CHECK-NEXT: br i1 [[CMP1_PEEL17]], label [[IF_THEN_PEEL18:%.*]], label [[FOR_INC_PEEL19:%.*]] -; CHECK: if.then.peel18: -; CHECK-NEXT: call void @f1() -; CHECK-NEXT: br label [[FOR_INC_PEEL19]] -; CHECK: for.inc.peel19: -; CHECK-NEXT: [[INC_PEEL20:%.*]] = add nsw i32 [[INC_PEEL13]], 1 -; CHECK-NEXT: [[CMP_PEEL21:%.*]] = icmp slt i32 [[INC_PEEL20]], [[K]] -; CHECK-NEXT: br i1 [[CMP_PEEL21]], label [[FOR_BODY_PEEL_NEXT15:%.*]], label [[FOR_END]] -; CHECK: for.body.peel.next15: -; CHECK-NEXT: br label [[FOR_BODY_PEEL23:%.*]] -; CHECK: for.body.peel23: -; CHECK-NEXT: [[CMP1_PEEL24:%.*]] = icmp ugt i32 [[INC_PEEL20]], 9999 -; CHECK-NEXT: br i1 [[CMP1_PEEL24]], label [[IF_THEN_PEEL25:%.*]], label [[FOR_INC_PEEL26:%.*]] -; CHECK: if.then.peel25: -; CHECK-NEXT: call void @f1() -; CHECK-NEXT: br label [[FOR_INC_PEEL26]] -; CHECK: for.inc.peel26: -; CHECK-NEXT: [[INC_PEEL27:%.*]] = add nsw i32 [[INC_PEEL20]], 1 -; CHECK-NEXT: [[CMP_PEEL28:%.*]] = icmp slt i32 [[INC_PEEL27]], [[K]] -; CHECK-NEXT: br i1 [[CMP_PEEL28]], label [[FOR_BODY_PEEL_NEXT22:%.*]], label [[FOR_END]] -; CHECK: for.body.peel.next22: -; CHECK-NEXT: br label [[FOR_BODY_PEEL30:%.*]] -; CHECK: for.body.peel30: -; CHECK-NEXT: [[CMP1_PEEL31:%.*]] = icmp ugt i32 [[INC_PEEL27]], 9999 -; CHECK-NEXT: br i1 [[CMP1_PEEL31]], label [[IF_THEN_PEEL32:%.*]], label [[FOR_INC_PEEL33:%.*]] -; CHECK: if.then.peel32: -; CHECK-NEXT: call void @f1() -; CHECK-NEXT: br label [[FOR_INC_PEEL33]] -; CHECK: for.inc.peel33: -; CHECK-NEXT: [[INC_PEEL34:%.*]] = add nsw i32 [[INC_PEEL27]], 1 -; CHECK-NEXT: [[CMP_PEEL35:%.*]] = icmp slt i32 [[INC_PEEL34]], [[K]] -; CHECK-NEXT: br i1 [[CMP_PEEL35]], label [[FOR_BODY_PEEL_NEXT29:%.*]], label [[FOR_END]] -; CHECK: for.body.peel.next29: -; CHECK-NEXT: br label [[FOR_BODY_PEEL37:%.*]] -; CHECK: for.body.peel37: -; CHECK-NEXT: [[CMP1_PEEL38:%.*]] = icmp ugt i32 [[INC_PEEL34]], 9999 -; CHECK-NEXT: br i1 [[CMP1_PEEL38]], label [[IF_THEN_PEEL39:%.*]], label [[FOR_INC_PEEL40:%.*]] -; CHECK: if.then.peel39: -; CHECK-NEXT: call void @f1() -; CHECK-NEXT: br label [[FOR_INC_PEEL40]] -; CHECK: for.inc.peel40: -; CHECK-NEXT: [[INC_PEEL41:%.*]] = add nsw i32 [[INC_PEEL34]], 1 -; CHECK-NEXT: [[CMP_PEEL42:%.*]] = icmp slt i32 [[INC_PEEL41]], [[K]] -; CHECK-NEXT: br i1 [[CMP_PEEL42]], label [[FOR_BODY_PEEL_NEXT36:%.*]], label [[FOR_END]] -; CHECK: for.body.peel.next36: -; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT43:%.*]] -; CHECK: for.body.peel.next43: -; CHECK-NEXT: br label [[FOR_BODY_LR_PH_PEEL_NEWPH:%.*]] -; CHECK: for.body.lr.ph.peel.newph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC_PEEL41]], [[FOR_BODY_LR_PH_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] ; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[I_05]], 9999 ; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; CHECK: if.then: ; CHECK-NEXT: call void @f1() ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_05]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !4 -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -535,36 +444,18 @@ ret void } +; In this test, the condition involves 2 AddRecs. Without evaluating both +; AddRecs, we cannot prove that the condition becomes known in the loop body +; after peeling. define void @test6(i32 %k) { ; CHECK-LABEL: @test6( ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] -; CHECK: for.body.peel.begin: -; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]] -; CHECK: for.body.peel: -; CHECK-NEXT: [[CMP1_PEEL:%.*]] = icmp ult i32 0, 4 -; CHECK-NEXT: br i1 [[CMP1_PEEL]], label [[IF_THEN_PEEL:%.*]], label [[IF_ELSE_PEEL:%.*]] -; CHECK: if.else.peel: -; CHECK-NEXT: call void @f2() -; CHECK-NEXT: br label [[FOR_INC_PEEL:%.*]] -; CHECK: if.then.peel: -; CHECK-NEXT: call void @f1() -; CHECK-NEXT: br label [[FOR_INC_PEEL]] -; CHECK: for.inc.peel: -; CHECK-NEXT: [[INC_PEEL:%.*]] = add nsw i32 0, 2 -; CHECK-NEXT: [[J_INC_PEEL:%.*]] = add nsw i32 4, 1 -; CHECK-NEXT: [[CMP_PEEL:%.*]] = icmp slt i32 [[INC_PEEL]], [[K:%.*]] -; CHECK-NEXT: br i1 [[CMP_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_END:%.*]] -; CHECK: for.body.peel.next: -; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT1:%.*]] -; CHECK: for.body.peel.next1: -; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] -; CHECK: entry.peel.newph: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[INC_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[INC]], [[FOR_INC]] ] -; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 4, [[ENTRY]] ], [ [[J_INC:%.*]], [[FOR_INC]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[I_05]], [[J]] +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: ; CHECK-NEXT: call void @f1() ; CHECK-NEXT: br label [[FOR_INC]] @@ -572,11 +463,10 @@ ; CHECK-NEXT: call void @f2() ; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_05]], 2 -; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]], !llvm.loop !5 -; CHECK: for.end.loopexit: -; CHECK-NEXT: br label [[FOR_END]] +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 2 +; CHECK-NEXT: [[J_INC]] = add nsw i32 [[J]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -585,7 +475,7 @@ for.body: %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] - %j = phi i32 [ 4, %entry ], [ %inc, %for.inc ] + %j = phi i32 [ 4, %entry ], [ %j.inc, %for.inc ] %cmp1 = icmp ult i32 %i.05, %j br i1 %cmp1, label %if.then, label %if.else @@ -606,3 +496,126 @@ for.end: ret void } + +define void @test7(i32 %k) { +; FIXME: Could simplify loop body by peeling one additional iteration after +; i != 3 becomes false +; CHECK-LABEL: @test7( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[I_05]], 3 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK: if.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ne i32 %i.05, 3 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + call void @f1() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +define void @test8(i32 %k) { +; FIXME: Could simplify loop body by peeling one additional iteration after +; i == 3 becomes true. +; CHECK-LABEL: @test8( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[I_05]], 3 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK: if.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp eq i32 %i.05, 3 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + call void @f1() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +} + +; Comparison with non-monotonic predicate due to possible wrapping, loop +; body cannot be simplified. +define void @test9(i32 %k) { +; CHECK-LABEL: @test9( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[I_05]], 3 +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK: if.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp slt i32 %i.05, 3 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + call void @f1() + br label %for.inc + +for.inc: + %inc = add i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +}