Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -280,9 +280,14 @@ /// loop body even when the number of loop iterations is not known at /// compile time). bool Runtime; + /// Allow generation of a loop remainder (extra iterations after unroll). + bool AllowRemainder; /// Allow emitting expensive instructions (such as divisions) when computing /// the trip count of a loop for runtime unrolling. bool AllowExpensiveTripCount; + /// Apply loop unroll on any kind of loop + /// (mainly to loops that fail runtime unrolling). + bool Force; }; /// \brief Get target-customized preferences for the generic loop unrolling Index: include/llvm/Transforms/Utils/UnrollLoop.h =================================================================== --- include/llvm/Transforms/Utils/UnrollLoop.h +++ include/llvm/Transforms/Utils/UnrollLoop.h @@ -29,10 +29,10 @@ class Pass; class ScalarEvolution; -bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, - bool AllowExpensiveTripCount, unsigned TripMultiple, - LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, bool PreserveLCSSA); +bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, + bool AllowRuntime, bool AllowExpensiveTripCount, + unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE, + DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA); bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count, bool AllowExpensiveTripCount, Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -80,6 +80,11 @@ cl::desc("Allows loops to be partially unrolled until " "-unroll-threshold loop size is reached.")); +static cl::opt UnrollAllowRemainder( + "unroll-allow-remainder", cl::Hidden, + cl::desc("Allow generation of a loop remainder (extra iterations) " + "when unrolling a loop.")); + static cl::opt UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden, cl::desc("Unroll loops with run-time trip counts")); @@ -99,12 +104,11 @@ static const unsigned DefaultUnrollRuntimeCount = 8; /// Gather the various unrolling parameters based on the defaults, compiler -/// flags, TTI overrides, pragmas, and user specified parameters. +/// flags, TTI overrides and user specified parameters. static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences( Loop *L, const TargetTransformInfo &TTI, Optional UserThreshold, Optional UserCount, Optional UserAllowPartial, - Optional UserRuntime, unsigned PragmaCount, bool PragmaFullUnroll, - bool PragmaEnableUnroll, unsigned TripCount) { + Optional UserRuntime) { TargetTransformInfo::UnrollingPreferences UP; // Set up the defaults @@ -119,7 +123,9 @@ UP.FullUnrollMaxCount = UINT_MAX; UP.Partial = false; UP.Runtime = false; + UP.AllowRemainder = true; UP.AllowExpensiveTripCount = false; + UP.Force = false; // Override with any target specific settings TTI.getUnrollingPreferences(L, UP); @@ -130,12 +136,6 @@ UP.PartialThreshold = UP.PartialOptSizeThreshold; } - // Apply unroll count pragmas - if (PragmaCount) - UP.Count = PragmaCount; - else if (PragmaFullUnroll) - UP.Count = TripCount; - // Apply any user values specified by cl::opt if (UnrollThreshold.getNumOccurrences() > 0) { UP.Threshold = UnrollThreshold; @@ -146,14 +146,14 @@ UnrollPercentDynamicCostSavedThreshold; if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0) UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount; - if (UnrollCount.getNumOccurrences() > 0) - UP.Count = UnrollCount; if (UnrollMaxCount.getNumOccurrences() > 0) UP.MaxCount = UnrollMaxCount; if (UnrollFullMaxCount.getNumOccurrences() > 0) UP.FullUnrollMaxCount = UnrollFullMaxCount; if (UnrollAllowPartial.getNumOccurrences() > 0) UP.Partial = UnrollAllowPartial; + if (UnrollAllowRemainder.getNumOccurrences() > 0) + UP.AllowRemainder = UnrollAllowRemainder; if (UnrollRuntime.getNumOccurrences() > 0) UP.Runtime = UnrollRuntime; @@ -169,18 +169,6 @@ if (UserRuntime.hasValue()) UP.Runtime = *UserRuntime; - if (PragmaCount > 0 || - ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0)) { - // If the loop has an unrolling pragma, we want to be more aggressive with - // unrolling limits. Set thresholds to at least the PragmaTheshold value - // which is larger than the default limits. - if (UP.Threshold != NoThreshold) - UP.Threshold = std::max(UP.Threshold, PragmaUnrollThreshold); - if (UP.PartialThreshold != NoThreshold) - UP.PartialThreshold = - std::max(UP.PartialThreshold, PragmaUnrollThreshold); - } - return UP; } @@ -536,84 +524,73 @@ return false; } -static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, - ScalarEvolution *SE, const TargetTransformInfo &TTI, - AssumptionCache &AC, bool PreserveLCSSA, - Optional ProvidedCount, - Optional ProvidedThreshold, - Optional ProvidedAllowPartial, - Optional ProvidedRuntime) { - BasicBlock *Header = L->getHeader(); - DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() - << "] Loop %" << Header->getName() << "\n"); +// Returns true if unroll count was set explicitly. +// Calculates unroll count and writes it to UP.Count. +static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, + DominatorTree &DT, LoopInfo *LI, + ScalarEvolution *SE, unsigned TripCount, + unsigned TripMultiple, unsigned LoopSize, + TargetTransformInfo::UnrollingPreferences &UP) { + // Check for explicit Count. + // 1st priority is unroll count set by "unroll-count" option. + bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; + if (UserUnrollCount) { + UP.Count = UnrollCount; + UP.AllowExpensiveTripCount = true; + UP.Force = true; + if (UP.AllowRemainder && + (LoopSize - 2) * UP.Count + 2 < UP.Threshold) + return true; + } - if (HasUnrollDisablePragma(L)) { - return false; + // 2nd priority is unroll count set by pragma. + unsigned PragmaCount = UnrollCountPragmaValue(L); + if (PragmaCount > 0) { + UP.Count = PragmaCount; + UP.Runtime = true; + UP.AllowExpensiveTripCount = true; + UP.Force = true; + if (UP.AllowRemainder && + (LoopSize - 2) * UP.Count + 2 < PragmaUnrollThreshold) + return true; } bool PragmaFullUnroll = HasUnrollFullPragma(L); - bool PragmaEnableUnroll = HasUnrollEnablePragma(L); - unsigned PragmaCount = UnrollCountPragmaValue(L); - bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0; - - // Find trip count and trip multiple if count is not available - unsigned TripCount = 0; - unsigned TripMultiple = 1; - // If there are multiple exiting blocks but one of them is the latch, use the - // latch for the trip count estimation. Otherwise insist on a single exiting - // block for the trip count estimation. - BasicBlock *ExitingBlock = L->getLoopLatch(); - if (!ExitingBlock || !L->isLoopExiting(ExitingBlock)) - ExitingBlock = L->getExitingBlock(); - if (ExitingBlock) { - TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); - TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); + if (PragmaFullUnroll && TripCount != 0) { + UP.Count = TripCount; + if ((LoopSize - 2) * UP.Count + 2 < PragmaUnrollThreshold) + return false; } - TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( - L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, - ProvidedRuntime, PragmaCount, PragmaFullUnroll, PragmaEnableUnroll, - TripCount); + bool PragmaEnableUnroll = HasUnrollEnablePragma(L); + bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll || PragmaEnableUnroll || + UserUnrollCount; - unsigned Count = UP.Count; - bool CountSetExplicitly = Count != 0; - // Use a heuristic count if we didn't set anything explicitly. - if (!CountSetExplicitly) - Count = TripCount == 0 ? DefaultUnrollRuntimeCount : TripCount; - if (TripCount && Count > TripCount) - Count = TripCount; - Count = std::min(Count, UP.FullUnrollMaxCount); + uint64_t UnrolledSize; + DebugLoc LoopLoc = L->getStartLoc(); + Function *F = L->getHeader()->getParent(); + LLVMContext &Ctx = F->getContext(); - unsigned NumInlineCandidates; - bool NotDuplicatable; - bool Convergent; - unsigned LoopSize = ApproximateLoopSize( - L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC); - DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); - - // When computing the unrolled size, note that the conditional branch on the - // backedge and the comparison feeding it are not replicated like the rest of - // the loop body (which is why 2 is subtracted). - uint64_t UnrolledSize = (uint64_t)(LoopSize - 2) * Count + 2; - if (NotDuplicatable) { - DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" - << " instructions.\n"); - return false; - } - if (NumInlineCandidates != 0) { - DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); - return false; + if (ExplicitUnroll && TripCount != 0) { + // If the loop has an unrolling pragma, we want to be more aggressive with + // unrolling limits. Set thresholds to at least the PragmaThreshold value + // which is larger than the default limits. + UP.Threshold = std::max(UP.Threshold, PragmaUnrollThreshold); + UP.PartialThreshold = + std::max(UP.PartialThreshold, PragmaUnrollThreshold); } - // Given Count, TripCount and thresholds determine the type of - // unrolling which is to be performed. - enum { Full = 0, Partial = 1, Runtime = 2 }; - int Unrolling; - if (TripCount && Count == TripCount) { - Unrolling = Partial; - // If the loop is really small, we don't need to run an expensive analysis. + // 3rd priority is full unroll count. + // Full unroll make sense only when TripCount could be staticaly calculated. + // Also we need to check if we exceed FullUnrollMaxCount. + if (TripCount && TripCount <= UP.FullUnrollMaxCount ) { + // When computing the unrolled size, note that the conditional branch on the + // backedge and the comparison feeding it are not replicated like the rest of + // the loop body (which is why 2 is subtracted). + UnrolledSize = (uint64_t)(LoopSize - 2) * TripCount + 2; if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount, UnrolledSize, UnrolledSize)) { - Unrolling = Full; + UP.Count = TripCount; + return ExplicitUnroll; } else { // The loop isn't that small, but we still can fully unroll it if that // helps to remove a significant number of instructions. @@ -625,147 +602,209 @@ UP.PercentDynamicCostSavedThreshold, UP.DynamicCostSavingsDiscount, Cost->UnrolledCost, Cost->RolledDynamicCost)) { - Unrolling = Full; + UP.Count = TripCount; + return ExplicitUnroll; } } - } else if (TripCount && Count < TripCount) { - Unrolling = Partial; - } else { - Unrolling = Runtime; } - // Reduce count based on the type of unrolling and the threshold values. - unsigned OriginalCount = Count; - bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || UP.Runtime; - // Don't unroll a runtime trip count loop with unroll full pragma. - if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) { - AllowRuntime = false; - } - bool DecreasedCountDueToConvergence = false; - if (Unrolling == Partial) { - bool AllowPartial = PragmaEnableUnroll || UP.Partial; - if (!AllowPartial && !CountSetExplicitly) { + // 4rd priority is partial unrolling. + // Try partial unroll only when TripCount could be staticaly calculated. + if (TripCount) { + if (UP.Count == 0) + UP.Count = TripCount; + UP.Partial |= ExplicitUnroll; + if (!UP.Partial) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); + UP.Count = 0; return false; } - if (UP.PartialThreshold != NoThreshold && Count > 1) { + if (UP.PartialThreshold != NoThreshold) { // Reduce unroll count to be modulo of TripCount for partial unrolling. + UnrolledSize = (uint64_t)(LoopSize - 2) * UP.Count + 2; if (UnrolledSize > UP.PartialThreshold) - Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2); - if (Count > UP.MaxCount) - Count = UP.MaxCount; - while (Count != 0 && TripCount % Count != 0) - Count--; - if (AllowRuntime && Count <= 1) { + UP.Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2); + if (UP.Count > UP.MaxCount) + UP.Count = UP.MaxCount; + while (UP.Count != 0 && TripCount % UP.Count != 0) + UP.Count--; + if (UP.AllowRemainder && UP.Count <= 1) { // If there is no Count that is modulo of TripCount, set Count to // largest power-of-two factor that satisfies the threshold limit. // As we'll create fixup loop, do the type of unrolling only if - // runtime unrolling is allowed. - Count = DefaultUnrollRuntimeCount; - UnrolledSize = (LoopSize - 2) * Count + 2; - while (Count != 0 && UnrolledSize > UP.PartialThreshold) { - Count >>= 1; - UnrolledSize = (LoopSize - 2) * Count + 2; + // remainder loop is allowed. + UP.Count = DefaultUnrollRuntimeCount; + UnrolledSize = (LoopSize - 2) * UP.Count + 2; + while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) { + UP.Count >>= 1; + UnrolledSize = (LoopSize - 2) * UP.Count + 2; } } + if (UP.Count < 2) { + if (PragmaEnableUnroll) + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to unroll loop as directed by unroll(enable) pragma " + "because unrolled size is too large."); + UP.Count = 0; + } + } else { + UP.Count = TripCount; } - } else if (Unrolling == Runtime) { - if (!AllowRuntime && !CountSetExplicitly) { - DEBUG(dbgs() << " will not try to unroll loop with runtime trip count " - << "-unroll-runtime not given\n"); - return false; - } - - // Reduce unroll count to be the largest power-of-two factor of - // the original count which satisfies the threshold limit. - while (Count != 0 && UnrolledSize > UP.PartialThreshold) { - Count >>= 1; - UnrolledSize = (LoopSize - 2) * Count + 2; - } - - if (Count > UP.MaxCount) - Count = UP.MaxCount; - - // If the loop contains a convergent operation, the prelude we'd add - // to do the first few instructions before we hit the unrolled loop - // is unsafe -- it adds a control-flow dependency to the convergent - // operation. Therefore Count must divide TripMultiple. - // - // TODO: This is quite conservative. In practice, convergent_op() - // is likely to be called unconditionally in the loop. In this - // case, the program would be ill-formed (on most architectures) - // unless n were the same on all threads in a thread group. - // Assuming n is the same on all threads, any kind of unrolling is - // safe. But currently llvm's notion of convergence isn't powerful - // enough to express this. - unsigned OrigCount = Count; - while (Convergent && Count != 0 && TripMultiple % Count != 0) { - DecreasedCountDueToConvergence = true; - Count >>= 1; - } - if (OrigCount > Count) { - DEBUG(dbgs() << " loop contains a convergent instruction, so unroll " - "count must divide the trip multiple, " - << TripMultiple << ". Reducing unroll count from " - << OrigCount << " to " << Count << ".\n"); - } - DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n"); - } - - if (HasPragma) { - // Emit optimization remarks if we are unable to unroll the loop - // as directed by a pragma. - DebugLoc LoopLoc = L->getStartLoc(); - Function *F = Header->getParent(); - LLVMContext &Ctx = F->getContext(); - if (PragmaCount > 0 && DecreasedCountDueToConvergence) { - emitOptimizationRemarkMissed( - Ctx, DEBUG_TYPE, *F, LoopLoc, - Twine("Unable to unroll loop the number of times directed by " - "unroll_count pragma because the loop contains a convergent " - "instruction, and so must have an unroll count that divides " - "the loop trip multiple of ") + - Twine(TripMultiple) + ". Unrolling instead " + Twine(Count) + - " time(s)."); - } else if ((PragmaCount > 0) && Count != OriginalCount) { + if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && + UP.Count != TripCount) emitOptimizationRemarkMissed( Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to unroll loop the number of times directed by " - "unroll_count pragma because unrolled size is too large."); - } else if (PragmaFullUnroll && !TripCount) { + "Unable to fully unroll loop as directed by unroll pragma because " + "unrolled size is too large."); + return ExplicitUnroll; + } + assert(TripCount == 0 && + "All cases when TripCount is constant should be covered here."); + if (PragmaFullUnroll) emitOptimizationRemarkMissed( Ctx, DEBUG_TYPE, *F, LoopLoc, "Unable to fully unroll loop as directed by unroll(full) pragma " "because loop has a runtime trip count."); - } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) { - emitOptimizationRemarkMissed( - Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to unroll loop as directed by unroll(enable) pragma because " - "unrolled size is too large."); - } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && - Count != TripCount) { + + // 5th priority is runtime unrolling. + // Don't unroll a runtime trip count loop when it is disabled. + if (HasRuntimeUnrollDisablePragma(L)) { + UP.Count = 0; + return false; + } + // Reduce count based on the type of unrolling and the threshold values. + UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount; + if (!UP.Runtime) { + DEBUG(dbgs() << " will not try to unroll loop with runtime trip count " + << "-unroll-runtime not given\n"); + UP.Count = 0; + return false; + } + if (UP.Count == 0) + UP.Count = DefaultUnrollRuntimeCount; + UnrolledSize = (LoopSize - 2) * UP.Count + 2; + + // Reduce unroll count to be the largest power-of-two factor of + // the original count which satisfies the threshold limit. + while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) { + UP.Count >>= 1; + UnrolledSize = (LoopSize - 2) * UP.Count + 2; + } + + unsigned OrigCount = UP.Count; + + if (!UP.AllowRemainder && UP.Count != 0 && (TripMultiple % UP.Count) != 0) { + while (UP.Count != 0 && TripMultiple % UP.Count != 0) + UP.Count >>= 1; + DEBUG(dbgs() << "Remainder loop is restricted (that could architecture " + "specific or because the loop contains a convergent " + "instruction), so unroll count must divide the trip " + "multiple, " + << TripMultiple << ". Reducing unroll count from " + << OrigCount << " to " << UP.Count << ".\n"); + if (PragmaCount > 0 && !UP.AllowRemainder) emitOptimizationRemarkMissed( Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to fully unroll loop as directed by unroll pragma because " - "unrolled size is too large."); - } + Twine("Unable to unroll loop the number of times directed by " + "unroll_count pragma because remainder loop is restricted " + "(that could architecture specific or because the loop " + "contains a convergent instruction) and so must have an unroll " + "count that divides the loop trip multiple of ") + + Twine(TripMultiple) + ". Unrolling instead " + Twine(UP.Count) + + " time(s)."); + } - if (Unrolling != Full && Count < 2) { - // Partial unrolling by 1 is a nop. For full unrolling, a factor - // of 1 makes sense because loop control can be eliminated. + if (UP.Count > UP.MaxCount) + UP.Count = UP.MaxCount; + DEBUG(dbgs() << " partially unrolling with count: " << UP.Count << "\n"); + if (UP.Count < 2) + UP.Count = 0; + return ExplicitUnroll; +} + +static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, + ScalarEvolution *SE, const TargetTransformInfo &TTI, + AssumptionCache &AC, bool PreserveLCSSA, + Optional ProvidedCount, + Optional ProvidedThreshold, + Optional ProvidedAllowPartial, + Optional ProvidedRuntime) { + BasicBlock *Header = L->getHeader(); + DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() + << "] Loop %" << Header->getName() << "\n"); + if (HasUnrollDisablePragma(L)) { return false; } + unsigned NumInlineCandidates; + bool NotDuplicatable; + bool Convergent; + unsigned LoopSize = ApproximateLoopSize( + L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC); + DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); + if (NotDuplicatable) { + DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" + << " instructions.\n"); + return false; + } + if (NumInlineCandidates != 0) { + DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); + return false; + } + + // Find trip count and trip multiple if count is not available + unsigned TripCount = 0; + unsigned TripMultiple = 1; + // If there are multiple exiting blocks but one of them is the latch, use the + // latch for the trip count estimation. Otherwise insist on a single exiting + // block for the trip count estimation. + BasicBlock *ExitingBlock = L->getLoopLatch(); + if (!ExitingBlock || !L->isLoopExiting(ExitingBlock)) + ExitingBlock = L->getExitingBlock(); + if (ExitingBlock) { + TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); + TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); + } + + TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences( + L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial, + ProvidedRuntime); + + // If the loop contains a convergent operation, the prelude we'd add + // to do the first few instructions before we hit the unrolled loop + // is unsafe -- it adds a control-flow dependency to the convergent + // operation. Therefore restrict remainder loop (try unrollig without). + // + // TODO: This is quite conservative. In practice, convergent_op() + // is likely to be called unconditionally in the loop. In this + // case, the program would be ill-formed (on most architectures) + // unless n were the same on all threads in a thread group. + // Assuming n is the same on all threads, any kind of unrolling is + // safe. But currently llvm's notion of convergence isn't powerful + // enough to express this. + if (Convergent) + UP.AllowRemainder = false; + + bool IsCountSetExplicitly = computeUnrollCount(L, TTI, DT, LI, SE, TripCount, + TripMultiple, LoopSize, UP); + if (!UP.Count) + return false; + // Unroll factor (Count) must be less or equal to TripCount. + if (TripCount && UP.Count > TripCount) + UP.Count = TripCount; + // Unroll the loop. - if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount, - TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA)) + if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime, + UP.AllowExpensiveTripCount, TripMultiple, LI, SE, &DT, &AC, + PreserveLCSSA)) return false; - // If loop has an unroll count pragma mark loop as unrolled to prevent - // unrolling beyond that requested by the pragma. - if (HasPragma && PragmaCount != 0) + // If loop has an unroll count pragma or unrolled by explicitly set count + // mark loop as unrolled to prevent unrolling beyond that requested. + if (IsCountSetExplicitly) SetLoopAlreadyUnrolled(L); return true; } Index: lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- lib/Transforms/Utils/LoopUnroll.cpp +++ lib/Transforms/Utils/LoopUnroll.cpp @@ -199,7 +199,7 @@ /// /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and /// DominatorTree if they are non-null. -bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, +bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force, bool AllowRuntime, bool AllowExpensiveTripCount, unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, @@ -300,8 +300,12 @@ if (RuntimeTripCount && TripMultiple % Count != 0 && !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount, UnrollRuntimeEpilog, LI, SE, DT, - PreserveLCSSA)) - return false; + PreserveLCSSA)) { + if (Force) + RuntimeTripCount = false; + else + return false; + } // Notify ScalarEvolution that the loop will be substantially changed, // if not outright eliminated. Index: test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll =================================================================== --- test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll +++ test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -S -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-runtime | FileCheck %s +; RUN: opt < %s -S -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-allow-remainder | FileCheck %s ; The Loop TripCount is 9. However unroll factors 3 or 9 exceed given threshold. ; The test checks that we choose a smaller, power-of-two, unroll count and do not give up on unrolling.