Index: include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- include/llvm/Transforms/Utils/UnrollLoop.h +++ include/llvm/Transforms/Utils/UnrollLoop.h @@ -71,7 +71,8 @@ void computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, - unsigned &TripCount, ScalarEvolution &SE); + unsigned &TripCount, ScalarEvolution &SE, + DominatorTree &DT); bool canPeel(Loop *L); Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -796,7 +796,7 @@ } // 4th priority is loop peeling - computePeelCount(L, LoopSize, UP, TripCount, SE); + computePeelCount(L, LoopSize, UP, TripCount, SE, DT); if (UP.PeelCount) { UP.Runtime = false; UP.Count = 1; Index: lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollPeel.cpp +++ lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -148,18 +148,22 @@ // else // .. // } +// It only considers conditions in blocks that are executed on every iteration. static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, - ScalarEvolution &SE) { + ScalarEvolution &SE, + DominatorTree &DT) { assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); unsigned DesiredPeelCount = 0; + BasicBlock *LoopLatch = L.getLoopLatch(); for (auto *BB : L.blocks()) { auto *BI = dyn_cast(BB->getTerminator()); if (!BI || BI->isUnconditional()) continue; - // Ignore loop exit condition. - if (L.getLoopLatch() == BB) + // Ignore loop exit condition and blocks that are not executed on every + // iteration. + if (LoopLatch == BB || !DT.dominates(BB, LoopLatch)) continue; Value *Condition = BI->getCondition(); @@ -219,7 +223,8 @@ // Return the number of iterations we want to peel off. void llvm::computePeelCount(Loop *L, unsigned LoopSize, TargetTransformInfo::UnrollingPreferences &UP, - unsigned &TripCount, ScalarEvolution &SE) { + unsigned &TripCount, ScalarEvolution &SE, + DominatorTree &DT) { assert(LoopSize > 0 && "Zero loop size is not allowed!"); // Save the UP.PeelCount value set by the target in // TTI.getUnrollingPreferences or by the flag -unroll-peel-count. @@ -273,8 +278,8 @@ unsigned MaxPeelCount = UnrollPeelMaxCount; MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); - DesiredPeelCount = std::max(DesiredPeelCount, - countToEliminateCompares(*L, MaxPeelCount, SE)); + DesiredPeelCount = std::max( + DesiredPeelCount, countToEliminateCompares(*L, MaxPeelCount, SE, DT)); if (DesiredPeelCount > 0) { DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount); Index: test/Transforms/LoopUnroll/peel-loop-conditions.ll =================================================================== --- test/Transforms/LoopUnroll/peel-loop-conditions.ll +++ test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -606,3 +606,49 @@ for.end: ret void } + +define void @test11(i32 %k) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: for.body.lr.ph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[I_05]], [[K:%.*]] +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK: if.then: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[I_05]], 2 +; CHECK-NEXT: br i1 [[CMP2]], label [[INNER_THEN:%.*]], label [[FOR_INC]] +; CHECK: inner.then: +; CHECK-NEXT: call void @f1() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add nsw i32 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[K]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %i.05, %k + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %cmp2 = icmp ugt i32 %i.05, 2 + br i1 %cmp2, label %inner.then, label %for.inc + +inner.then: + call void @f1() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end + +for.end: + ret void +}