Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -475,6 +475,11 @@ bool UnrollRemainder; /// Allow unroll and jam. Used to enable unroll and jam for the target. bool UnrollAndJam; + /// Allow peeling basing on profile. Uses to enable peeling off all + /// iterations basing on provided profile. + /// If the value is true the peeling cost model can decide to peel only + /// some iteration and in this case it will set this to false. + bool PeelProfiledIterations; /// Threshold for unroll and jam, for inner loop size. The 'Threshold' /// value above is used during unroll and jam for the outer loop size. /// This value is used in the same manner to limit the size of the inner Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -202,6 +202,7 @@ UP.UpperBound = false; UP.AllowPeeling = true; UP.UnrollAndJam = false; + UP.PeelProfiledIterations = true; UP.UnrollAndJamInnerLoopThreshold = 60; // Override with any target specific settings @@ -1139,7 +1140,7 @@ // If the loop was peeled, we already "used up" the profile information // we had, so we don't want to unroll or peel again. if (UnrollResult != LoopUnrollResult::FullyUnrolled && - (IsCountSetExplicitly || UP.PeelCount)) + (IsCountSetExplicitly || (UP.PeelProfiledIterations && UP.PeelCount))) L->setLoopAlreadyUnrolled(); return UnrollResult; Index: llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -65,6 +65,8 @@ "unroll-peel-multi-deopt-exit", cl::init(true), cl::Hidden, cl::desc("Allow peeling of loops with multiple deopt exits.")); +static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; + // Designates that a Phi is estimated to become invariant after an "infinite" // number of loop iterations (i.e. only may become an invariant if the loop is // fully unrolled). @@ -275,6 +277,7 @@ LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount << " iterations.\n"); UP.PeelCount = UnrollForcePeelCount; + UP.PeelProfiledIterations = true; return; } @@ -282,6 +285,13 @@ if (!UP.AllowPeeling) return; + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + // Stop if we already peeled off the maximum number of iterations. + if (AlreadyPeeled >= UnrollPeelMaxCount) + return; + // Here we try to get rid of Phis which become invariants after 1, 2, ..., N // iterations of the loop. For this we compute the number for iterations after // which every Phi is guaranteed to become an invariant, and try to peel the @@ -320,8 +330,11 @@ LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn" << " some Phis into invariants.\n"); - UP.PeelCount = DesiredPeelCount; - return; + if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) { + UP.PeelCount = DesiredPeelCount; + UP.PeelProfiledIterations = false; + return; + } } } @@ -330,6 +343,9 @@ if (TripCount) return; + // Do not apply profile base peeling if it is disabled. + if (!UP.PeelProfiledIterations) + return; // If we don't know the trip count, but have reason to believe the average // trip count is low, peeling should be beneficial, since we will usually // hit the peeled section. @@ -344,7 +360,7 @@ << "\n"); if (*PeelCount) { - if ((*PeelCount <= UnrollPeelMaxCount) && + if ((*PeelCount + AlreadyPeeled <= UnrollPeelMaxCount) && (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); @@ -352,6 +368,7 @@ return; } LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); + LLVM_DEBUG(dbgs() << "Already peel count: " << AlreadyPeeled << "\n"); LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); @@ -719,5 +736,11 @@ NumPeeled++; + // Update Metadata for count of peeled off iterations. + unsigned AlreadyPeeled = 0; + if (auto Peeled = getOptionalIntLoopAttribute(L, PeeledCountMetaData)) + AlreadyPeeled = *Peeled; + addStringMetadataToLoop(L, PeeledCountMetaData, AlreadyPeeled + PeelCount); + return true; } Index: llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-1.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -loop-unroll -loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s +; REQUIRES: asserts + +declare void @f1() +declare void @f2() + +; Check that we can peel off iterations that make conditions true. +; The second invocation of loop-unroll will do profile based peeling of +; remained iterations. +define void @test1(i32 %k) !prof !4 { +; CHECK: Loop Unroll: F[test1] Loop %for.body +; CHECK: PEELING loop %for.body with iteration count 2! +; CHECK: PEELING loop %for.body with iteration count 4! +; CHECK: llvm.loop.unroll.disable +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %i.05, 2 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + call void @f1() + br label %for.inc + +if.else: + call void @f2() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1, !prof !2 + +for.end: + ret void +} + +!1 = distinct !{!1} +!2 = !{!"branch_weights", i32 6, i32 1} +!4 = !{!"function_entry_count", i64 1} Index: llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopUnroll/peel-loop-conditions-pgo-2.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -loop-unroll -loop-unroll -verify-dom-info -debug-only=loop-unroll -unroll-peel-max-count=7 2>&1 | FileCheck %s +; REQUIRES: asserts + +declare void @f1() +declare void @f2() + +; Check that we can peel off iterations that make conditions true. +; The second invocation of loop-unroll will NOT do profile based peeling of +; remained iterations because the total number of peeled iterations exceeds +; threashold specified with -unroll-peel-max-count=7. +define void @test2(i32 %k) !prof !4 { +; CHECK: Loop Unroll: F[test2] Loop %for.body +; CHECK: PEELING loop %for.body with iteration count 2! +; CHECK-NOT: llvm.loop.unroll.disable +for.body.lr.ph: + br label %for.body + +for.body: + %i.05 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] + %cmp1 = icmp ult i32 %i.05, 2 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + call void @f1() + br label %for.inc + +if.else: + call void @f2() + br label %for.inc + +for.inc: + %inc = add nsw i32 %i.05, 1 + %cmp = icmp slt i32 %inc, %k + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1, !prof !3 + +for.end: + ret void +} + +!1 = distinct !{!1} +!3 = !{!"branch_weights", i32 8, i32 1} +!4 = !{!"function_entry_count", i64 1} Index: llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll +++ llvm/test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -643,3 +643,4 @@ for.end: ret void } +; CHECK-NOT: llvm.loop.unroll.disable \ No newline at end of file