diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -65,7 +65,6 @@ #include #include #include -#include #include #include #include @@ -320,16 +319,6 @@ unsigned RolledDynamicCost; }; -struct PragmaInfo { - PragmaInfo(bool UUC, bool PFU, unsigned PC, bool PEU) - : UserUnrollCount(UUC), PragmaFullUnroll(PFU), PragmaCount(PC), - PragmaEnableUnroll(PEU) {} - const bool UserUnrollCount; - const bool PragmaFullUnroll; - const unsigned PragmaCount; - const bool PragmaEnableUnroll; -}; - } // end anonymous namespace /// Figure out if the loop is worth full unrolling. @@ -758,132 +747,13 @@ // Returns loop size estimation for unrolled loop, given the unrolling // configuration specified by UP. - uint64_t - getUnrolledLoopSize(const TargetTransformInfo::UnrollingPreferences &UP, - const unsigned CountOverwrite = 0) const { + uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) { assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!"); - if (CountOverwrite) - return static_cast(LoopSize - UP.BEInsns) * CountOverwrite + - UP.BEInsns; - else - return static_cast(LoopSize - UP.BEInsns) * UP.Count + - UP.BEInsns; + return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns; } }; -static Optional -shouldPragmaUnroll(Loop *L, const PragmaInfo &PInfo, - const unsigned TripMultiple, const unsigned TripCount, - const UnrollCostEstimator UCE, - const TargetTransformInfo::UnrollingPreferences &UP) { - - // Using unroll pragma - // 1st priority is unroll count set by "unroll-count" option. - - if (PInfo.UserUnrollCount) { - if (UP.AllowRemainder && - UCE.getUnrolledLoopSize(UP, (unsigned)UnrollCount) < UP.Threshold) - return (unsigned)UnrollCount; - } - - // 2nd priority is unroll count set by pragma. - if (PInfo.PragmaCount > 0) { - if ((UP.AllowRemainder || (TripMultiple % PInfo.PragmaCount == 0)) && - UCE.getUnrolledLoopSize(UP, PInfo.PragmaCount) < PragmaUnrollThreshold) - return PInfo.PragmaCount; - } - - if (PInfo.PragmaFullUnroll && TripCount != 0) { - if (UCE.getUnrolledLoopSize(UP, TripCount) < PragmaUnrollThreshold) - return TripCount; - } - // if didn't return until here, should continue to other priorties - return None; -} - -static Optional shouldFullUnroll( - Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, - ScalarEvolution &SE, const SmallPtrSetImpl &EphValues, - const unsigned FullUnrollTripCount, const UnrollCostEstimator UCE, - const TargetTransformInfo::UnrollingPreferences &UP) { - - if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { - // When computing the unrolled size, note that BEInsns are not replicated - // like the rest of the loop body. - if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) { - return FullUnrollTripCount; - - } else { - // The loop isn't that small, but we still can fully unroll it if that - // helps to remove a significant number of instructions. - // To check that, run additional analysis on the loop. - if (Optional Cost = analyzeLoopUnrollCost( - L, FullUnrollTripCount, DT, SE, EphValues, TTI, - UP.Threshold * UP.MaxPercentThresholdBoost / 100, - UP.MaxIterationsCountToAnalyze)) { - unsigned Boost = - getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); - if (Cost->UnrolledCost < UP.Threshold * Boost / 100) { - return FullUnrollTripCount; - } - } - } - } - return None; -} - -static Optional -shouldPartialUnroll(const unsigned LoopSize, const unsigned TripCount, - const UnrollCostEstimator UCE, - const TargetTransformInfo::UnrollingPreferences &UP) { - - unsigned count = UP.Count; - if (TripCount) { - if (!UP.Partial) { - LLVM_DEBUG(dbgs() << " will not try to unroll partially because " - << "-unroll-allow-partial not given\n"); - count = 0; - return count; - } - if (count == 0) - count = TripCount; - if (UP.PartialThreshold != NoThreshold) { - // Reduce unroll count to be modulo of TripCount for partial unrolling. - if (UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) - count = (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / - (LoopSize - UP.BEInsns); - if (count > UP.MaxCount) - count = UP.MaxCount; - while (count != 0 && TripCount % count != 0) - count--; - if (UP.AllowRemainder && count <= 1) { - // If there is no Count that is modulo of TripCount, set Count to - // largest power-of-two factor that satisfies the threshold limit. - // As we'll create fixup loop, do the type of unrolling only if - // remainder loop is allowed. - count = UP.DefaultUnrollRuntimeCount; - while (count != 0 && - UCE.getUnrolledLoopSize(UP, count) > UP.PartialThreshold) - count >>= 1; - } - if (count < 2) { - count = 0; - } - } else { - count = TripCount; - } - if (count > UP.MaxCount) - count = UP.MaxCount; - - LLVM_DEBUG(dbgs() << " partially unrolling with count: " << count << "\n"); - - return count; - } - - // if didn't return until here, should continue to other priorties - return None; -} // Returns true if unroll count was set explicitly. // Calculates unroll count and writes it to UP.Count. // Unless IgnoreUser is true, will also use metadata and command-line options @@ -901,18 +771,7 @@ TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) { UnrollCostEstimator UCE(*L, LoopSize); - Optional UnrollFactor; - - const bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; - const bool PragmaFullUnroll = hasUnrollFullPragma(L); - const unsigned PragmaCount = unrollCountPragmaValue(L); - const bool PragmaEnableUnroll = hasUnrollEnablePragma(L); - - const bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll || - PragmaEnableUnroll || UserUnrollCount; - PragmaInfo PInfo(UserUnrollCount, PragmaFullUnroll, PragmaCount, - PragmaEnableUnroll); // Use an explicit peel count that has been specified for testing. In this // case it's not permitted to also specify an explicit unroll count. if (PP.PeelCount) { @@ -924,29 +783,47 @@ UP.Runtime = false; return true; } + // Check for explicit Count. // 1st priority is unroll count set by "unroll-count" option. - // 2nd priority is unroll count set by pragma. - UnrollFactor = shouldPragmaUnroll(L, PInfo, TripMultiple, TripCount, UCE, UP); + bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; + if (UserUnrollCount) { + UP.Count = UnrollCount; + UP.AllowExpensiveTripCount = true; + UP.Force = true; + if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold) + return true; + } - if (UnrollFactor) { - UP.Count = *UnrollFactor; + // 2nd priority is unroll count set by pragma. + unsigned PragmaCount = unrollCountPragmaValue(L); + if (PragmaCount > 0) { + UP.Count = PragmaCount; + UP.Runtime = true; + UP.AllowExpensiveTripCount = true; + UP.Force = true; + if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) && + UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold) + return true; + } + bool PragmaFullUnroll = hasUnrollFullPragma(L); + if (PragmaFullUnroll && TripCount != 0) { + UP.Count = TripCount; + if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold) + return false; + } - if (UserUnrollCount || (PragmaCount > 0)) { - UP.AllowExpensiveTripCount = true; - UP.Force = true; - } - UP.Runtime |= (PragmaCount > 0); - return ExplicitUnroll; - } else { - if (ExplicitUnroll && TripCount != 0) { - // If the loop has an unrolling pragma, we want to be more aggressive with - // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold - // value which is larger than the default limits. - UP.Threshold = std::max(UP.Threshold, PragmaUnrollThreshold); - UP.PartialThreshold = - std::max(UP.PartialThreshold, PragmaUnrollThreshold); - } + bool PragmaEnableUnroll = hasUnrollEnablePragma(L); + bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll || + PragmaEnableUnroll || UserUnrollCount; + + if (ExplicitUnroll && TripCount != 0) { + // If the loop has an unrolling pragma, we want to be more aggressive with + // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold + // value which is larger than the default limits. + UP.Threshold = std::max(UP.Threshold, PragmaUnrollThreshold); + UP.PartialThreshold = + std::max(UP.PartialThreshold, PragmaUnrollThreshold); } // 3rd priority is full unroll count. @@ -976,20 +853,28 @@ unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount; UP.Count = FullUnrollTripCount; - - UnrollFactor = - shouldFullUnroll(L, TTI, DT, SE, EphValues, FullUnrollTripCount, UCE, UP); - - // if shouldFullUnroll can do the unrolling, some side parameteres should be - // set - if (UnrollFactor) { - UP.Count = *UnrollFactor; - UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); - TripCount = FullUnrollTripCount; - TripMultiple = UP.UpperBound ? 1 : TripMultiple; - return ExplicitUnroll; - } else { - UP.Count = FullUnrollTripCount; + if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { + // When computing the unrolled size, note that BEInsns are not replicated + // like the rest of the loop body. + if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) { + UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); + return ExplicitUnroll; + } else { + // The loop isn't that small, but we still can fully unroll it if that + // helps to remove a significant number of instructions. + // To check that, run additional analysis on the loop. + if (Optional Cost = analyzeLoopUnrollCost( + L, FullUnrollTripCount, DT, SE, EphValues, TTI, + UP.Threshold * UP.MaxPercentThresholdBoost / 100, + UP.MaxIterationsCountToAnalyze)) { + unsigned Boost = + getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); + if (Cost->UnrolledCost < UP.Threshold * Boost / 100) { + UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); + return ExplicitUnroll; + } + } + } } // 4th priority is loop peeling. @@ -1000,31 +885,39 @@ return ExplicitUnroll; } - // Before starting partial unrolling, set up.partial to true, - // if user explicitly asked for unrolling - if (TripCount) - UP.Partial |= ExplicitUnroll; - // 5th priority is partial unrolling. // Try partial unroll only when TripCount could be statically calculated. - UnrollFactor = shouldPartialUnroll(LoopSize, TripCount, UCE, UP); - - if (UnrollFactor) { - UP.Count = *UnrollFactor; - - if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && - UP.Count != TripCount) - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, - "FullUnrollAsDirectedTooLarge", - L->getStartLoc(), L->getHeader()) - << "Unable to fully unroll loop as directed by unroll pragma " - "because " - "unrolled size is too large."; - }); - + if (TripCount) { + UP.Partial |= ExplicitUnroll; + if (!UP.Partial) { + LLVM_DEBUG(dbgs() << " will not try to unroll partially because " + << "-unroll-allow-partial not given\n"); + UP.Count = 0; + return false; + } + if (UP.Count == 0) + UP.Count = TripCount; if (UP.PartialThreshold != NoThreshold) { - if (UP.Count == 0) { + // Reduce unroll count to be modulo of TripCount for partial unrolling. + if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold) + UP.Count = + (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / + (LoopSize - UP.BEInsns); + if (UP.Count > UP.MaxCount) + UP.Count = UP.MaxCount; + while (UP.Count != 0 && TripCount % UP.Count != 0) + UP.Count--; + if (UP.AllowRemainder && UP.Count <= 1) { + // If there is no Count that is modulo of TripCount, set Count to + // largest power-of-two factor that satisfies the threshold limit. + // As we'll create fixup loop, do the type of unrolling only if + // remainder loop is allowed. + UP.Count = UP.DefaultUnrollRuntimeCount; + while (UP.Count != 0 && + UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold) + UP.Count >>= 1; + } + if (UP.Count < 2) { if (PragmaEnableUnroll) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, @@ -1034,8 +927,25 @@ "pragma " "because unrolled size is too large."; }); + UP.Count = 0; } + } else { + UP.Count = TripCount; } + if (UP.Count > UP.MaxCount) + UP.Count = UP.MaxCount; + if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && + UP.Count != TripCount) + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, + "FullUnrollAsDirectedTooLarge", + L->getStartLoc(), L->getHeader()) + << "Unable to fully unroll loop as directed by unroll pragma " + "because " + "unrolled size is too large."; + }); + LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count + << "\n"); return ExplicitUnroll; } assert(TripCount == 0 && @@ -1072,6 +982,8 @@ UP.AllowExpensiveTripCount = true; } } + + // Reduce count based on the type of unrolling and the threshold values. UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount; if (!UP.Runtime) { LLVM_DEBUG( @@ -1106,7 +1018,7 @@ using namespace ore; - if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder) + if (PragmaCount > 0 && !UP.AllowRemainder) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "DifferentUnrollCountFromDirected",