Index: llvm/lib/Transforms/Utils/LoopPeel.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopPeel.cpp +++ llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -468,8 +468,12 @@ << *EstimatedTripCount << "\n"); if (*EstimatedTripCount) { - if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount) { - unsigned PeelCount = *EstimatedTripCount; + // Even if we cannot peel all estimated iterations, partial peeling also + // seems beneficial, but only to some extent. + if (*EstimatedTripCount + AlreadyPeeled <= MaxPeelCount * 2 && + AlreadyPeeled < MaxPeelCount) { + unsigned PeelCount = + std::min(*EstimatedTripCount, MaxPeelCount - AlreadyPeeled); LLVM_DEBUG(dbgs() << "Peeling first " << PeelCount << " iterations.\n"); PP.PeelCount = PeelCount; return; Index: llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll +++ llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll @@ -7,8 +7,8 @@ ; Check that we can peel off iterations that make conditions true. ; The second invocation of loop-unroll will NOT do profile based peeling of -; remained iterations because the total number of peeled iterations exceeds -; threashold specified with -unroll-peel-max-count=7. +; remaining iterations because the total number of iterations exceeds the +; doubled threshold specified with -unroll-peel-max-count=7. define void @test2(i32 %k) !prof !4 { ; CHECK: Loop Unroll: F[test2] Loop %for.body ; CHECK: PEELING loop %for.body with iteration count 2! @@ -39,5 +39,5 @@ } !1 = distinct !{!1} -!3 = !{!"branch_weights", i32 8, i32 1} +!3 = !{!"branch_weights", i32 14, i32 1} !4 = !{!"function_entry_count", i64 1} Index: llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-3.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-3.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -S -loop-unroll -loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s +; REQUIRES: asserts + +declare void @f1() +declare void @f2() + +; Check that we can peel off iterations that make conditions true. +; The second invocation of loop-unroll will do profile-based peeling of remaining +; iterations and will partially peel them so that the total number of peeled +; iterations does not exceed the threshold specified with -unroll-peel-max-count=7. +define void @test2(i32 %k) !prof !4 { +; CHECK: Loop Unroll: F[test2] Loop %for.body +; CHECK: PEELING loop %for.body with iteration count 2! +; CHECK: PEELING loop %for.body with iteration count 5! +; CHECK: llvm.loop.unroll.disable +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %i.05, 2 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + call void @f1() + br label %for.inc + +if.else: + call void @f2() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1, !prof !3 + +for.end: + ret void +} + +!1 = distinct !{!1} +!3 = !{!"branch_weights", i32 13, i32 1} +!4 = !{!"function_entry_count", i64 1}