Index: llvm/include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -66,11 +66,9 @@ struct UnrollLoopOptions { unsigned Count; - unsigned TripCount; bool Force; - bool AllowRuntime; + bool Runtime; bool AllowExpensiveTripCount; - unsigned TripMultiple; bool UnrollRemainder; bool ForgetAllSCEV; }; Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1192,6 +1192,13 @@ return LoopUnrollResult::Unmodified; } + // At this point, UP.Runtime indicated that run-time unrolling is allowed. + // However, we only actually want to perform it if we don't know the trip + // count and the unroll count doesn't divide the konwn trip count. + // TODO: This decision should probably be pushed up into + // computeUnrollCount(). + UP.Runtime &= TripCount == 0 && TripMultiple % UP.Count != 0; + // Save loop properties before it is transformed. MDNode *OrigLoopID = L->getLoopID(); @@ -1199,8 +1206,8 @@ Loop *RemainderLoop = nullptr; LoopUnrollResult UnrollResult = UnrollLoop( L, - {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, - TripMultiple, UP.UnrollRemainder, ForgetAllSCEV}, + {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, + UP.UnrollRemainder, ForgetAllSCEV}, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); if (UnrollResult == LoopUnrollResult::Unmodified) return LoopUnrollResult::Unmodified; Index: llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -244,19 +244,10 @@ /// branch instruction. However, if the trip count (and multiple) are not known, /// loop unrolling will mostly produce more code that is no faster. /// -/// TripCount is an upper bound on the number of times the loop header runs. -/// Note that the trip count does not need to be exact, it can be any upper -/// bound on the true trip count. -/// -/// Similarly, TripMultiple divides the number of times that the LatchBlock may -/// execute without exiting the loop. -/// -/// If AllowRuntime is true then UnrollLoop will consider unrolling loops that -/// have a runtime (i.e. not compile time constant) trip count. Unrolling these -/// loops require a unroll "prologue" that runs "RuntimeTripCount % Count" -/// iterations before branching into the unrolled loop. UnrollLoop will not -/// runtime-unroll the loop if computing RuntimeTripCount will be expensive and -/// AllowExpensiveTripCount is false. +/// If Runtime is true then UnrollLoop will try to insert a prologue or +/// epilogue that ensures the latch has a trip multiple of Count. UnrollLoop +/// will not runtime-unroll the loop if computing the run-time trip count will +/// be expensive and AllowExpensiveTripCount is false. /// /// The LoopInfo Analysis that is passed will be kept consistent. /// @@ -296,20 +287,7 @@ return LoopUnrollResult::Unmodified; } - if (ULO.TripCount != 0) - LLVM_DEBUG(dbgs() << " Trip Count = " << ULO.TripCount << "\n"); - if (ULO.TripMultiple != 1) - LLVM_DEBUG(dbgs() << " Trip Multiple = " << ULO.TripMultiple << "\n"); - - // Don't enter the unroll code if there is nothing to do. - if (ULO.TripCount == 0 && ULO.Count < 2) { - LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); - return LoopUnrollResult::Unmodified; - } - assert(ULO.Count > 0); - assert(ULO.TripMultiple > 0); - assert(ULO.TripCount == 0 || ULO.TripCount % ULO.TripMultiple == 0); // All these values should be taken only after peeling because they might have // changed. @@ -366,11 +344,10 @@ const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero; - // We assume a run-time trip count if the compiler cannot - // figure out the loop trip count and the unroll-runtime - // flag is specified. - bool RuntimeTripCount = !CompletelyUnroll && ULO.TripCount == 0 && - ULO.TripMultiple % ULO.Count != 0 && ULO.AllowRuntime; + // There's no point in performing runtime unrolling if this unroll count + // results in a full unroll. + if (CompletelyUnroll) + ULO.Runtime = false; // Go through all exits of L and see if there are any phi-nodes there. We just // conservatively assume that they're inserted to preserve LCSSA form, which @@ -400,8 +377,9 @@ return LoopUnrollResult::Unmodified; } - // Loops containing convergent instructions must have a count that divides - // their TripMultiple. + // Loops containing convergent instructions cannot use runtime unrolling, + // as the prologue/epilogue may add additional control-dependencies to + // convergent operations. LLVM_DEBUG( { bool HasConvergent = false; @@ -409,22 +387,21 @@ for (auto &I : *BB) if (auto *CB = dyn_cast(&I)) HasConvergent |= CB->isConvergent(); - assert((!HasConvergent || ULO.TripMultiple % ULO.Count == 0) && - "Unroll count must divide trip multiple if loop contains a " - "convergent operation."); + assert((!HasConvergent || !ULO.Runtime) && + "Can't runtime unroll if loop contains a convergent operation."); }); bool EpilogProfitability = UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog : isEpilogProfitable(L); - if (RuntimeTripCount && + if (ULO.Runtime && !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability, ULO.UnrollRemainder, ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, PreserveLCSSA, RemainderLoop)) { if (ULO.Force) - RuntimeTripCount = false; + ULO.Runtime = false; else { LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be " "generated when assuming runtime trip count\n"); @@ -447,7 +424,7 @@ } else { LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << ULO.Count); - if (RuntimeTripCount) + if (ULO.Runtime) LLVM_DEBUG(dbgs() << " with run-time trip count"); LLVM_DEBUG(dbgs() << "!\n"); @@ -456,7 +433,7 @@ OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), L->getHeader()); Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count); - if (RuntimeTripCount) + if (ULO.Runtime) Diag << " with run-time trip count"; return Diag; }); @@ -715,7 +692,7 @@ return None; } - if (RuntimeTripCount) { + if (ULO.Runtime) { // If runtime unrolling inserts a prologue, information about non-latch // exits may be stale. if (IsLatch && j != 0) Index: llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp +++ llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -248,7 +248,7 @@ bool CompletelyUnroll = (Count == TripCount); // We use the runtime remainder in cases where we don't know trip multiple - if (TripMultiple == 1 || TripMultiple % Count != 0) { + if (TripMultiple % Count != 0) { if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false, /*UseEpilogRemainder*/ true, UnrollRemainder, /*ForgetAllSCEV*/ false, Index: llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -984,9 +984,8 @@ LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); UnrollResult = UnrollLoop(remainderLoop, - {/*Count*/ Count - 1, /*TripCount*/ Count - 1, - /*Force*/ false, /*AllowRuntime*/ false, - /*AllowExpensiveTripCount*/ false, /*TripMultiple*/ 1, + {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false, + /*AllowExpensiveTripCount*/ false, /*UnrollRemainder*/ false, ForgetAllSCEV}, LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA); }