diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -753,6 +753,179 @@ } }; +bool unrollingLogic(bool FullOrPartial, Loop *L, const TargetTransformInfo &TTI, + DominatorTree &DT, ScalarEvolution &SE, + const SmallPtrSetImpl &EphValues, + OptimizationRemarkEmitter *ORE, bool MaxOrZero, + unsigned LoopSize, unsigned &TripMultiple, + unsigned &TripCount, unsigned &MaxTripCount, + UnrollCostEstimator UCE, + TargetTransformInfo::UnrollingPreferences &UP, + TargetTransformInfo::PeelingPreferences &PP, + bool &UseUpperBound) { + + bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; + + bool PragmaFullUnroll = hasUnrollFullPragma(L); + unsigned PragmaCount = unrollCountPragmaValue(L); + bool PragmaEnableUnroll = hasUnrollEnablePragma(L); + bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll || + PragmaEnableUnroll || UserUnrollCount; + + if (PragmaFullUnroll) + ORE->emit([&]() { + return OptimizationRemarkMissed( + DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount", + L->getStartLoc(), L->getHeader()) + << "Unable to fully unroll loop as directed by unroll(full) " + "pragma " + "because loop has a runtime trip count."; + }); + + if (FullOrPartial) { + + // Using unroll pragma + if (UnrollCount.getNumOccurrences() > 0) { + UP.Count = UnrollCount; + UP.AllowExpensiveTripCount = true; + UP.Force = true; + if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold) + return true; + } + + if (PragmaCount > 0) { + UP.Count = PragmaCount; + UP.Runtime = true; + UP.AllowExpensiveTripCount = true; + UP.Force = true; + if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) && + UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold) + return true; + } + + if (PragmaFullUnroll && TripCount != 0) { + UP.Count = TripCount; + if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold) + return false; + } + + if (ExplicitUnroll && TripCount != 0) { + // If the loop has an unrolling pragma, we want to be more aggressive with + // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold + // value which is larger than the default limits. + UP.Threshold = std::max(UP.Threshold, PragmaUnrollThreshold); + UP.PartialThreshold = + std::max(UP.PartialThreshold, PragmaUnrollThreshold); + } + + unsigned FullUnrollMaxTripCount = MaxTripCount; + if (!(UP.UpperBound || MaxOrZero) || + FullUnrollMaxTripCount > UnrollMaxUpperBound) + FullUnrollMaxTripCount = 0; + + // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only + // compute the former when the latter is zero. + unsigned ExactTripCount = TripCount; + assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) && + "ExtractTripCount and UnrollByMaxCount cannot both be non zero."); + + unsigned FullUnrollTripCount = + ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount; + UP.Count = FullUnrollTripCount; + if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { + // When computing the unrolled size, note that BEInsns are not replicated + // like the rest of the loop body. + if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) { + UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); + TripCount = FullUnrollTripCount; + TripMultiple = UP.UpperBound ? 1 : TripMultiple; + return ExplicitUnroll; + } else { + // The loop isn't that small, but we still can fully unroll it if that + // helps to remove a significant number of instructions. + // To check that, run additional analysis on the loop. + if (Optional Cost = analyzeLoopUnrollCost( + L, FullUnrollTripCount, DT, SE, EphValues, TTI, + UP.Threshold * UP.MaxPercentThresholdBoost / 100, + UP.MaxIterationsCountToAnalyze)) { + unsigned Boost = + getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); + if (Cost->UnrolledCost < UP.Threshold * Boost / 100) { + UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); + TripCount = FullUnrollTripCount; + TripMultiple = UP.UpperBound ? 1 : TripMultiple; + return ExplicitUnroll; + } + } + } + } + } else { + if (TripCount) { + UP.Partial |= ExplicitUnroll; + if (!UP.Partial) { + LLVM_DEBUG(dbgs() << " will not try to unroll partially because " + << "-unroll-allow-partial not given\n"); + UP.Count = 0; + return false; + } + if (UP.Count == 0) + UP.Count = TripCount; + if (UP.PartialThreshold != NoThreshold) { + // Reduce unroll count to be modulo of TripCount for partial unrolling. + if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold) + UP.Count = + (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / + (LoopSize - UP.BEInsns); + if (UP.Count > UP.MaxCount) + UP.Count = UP.MaxCount; + while (UP.Count != 0 && TripCount % UP.Count != 0) + UP.Count--; + if (UP.AllowRemainder && UP.Count <= 1) { + // If there is no Count that is modulo of TripCount, set Count to + // largest power-of-two factor that satisfies the threshold limit. + // As we'll create fixup loop, do the type of unrolling only if + // remainder loop is allowed. + UP.Count = UP.DefaultUnrollRuntimeCount; + while (UP.Count != 0 && + UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold) + UP.Count >>= 1; + } + if (UP.Count < 2) { + if (PragmaEnableUnroll) + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, + "UnrollAsDirectedTooLarge", + L->getStartLoc(), L->getHeader()) + << "Unable to unroll loop as directed by unroll(enable) " + "pragma " + "because unrolled size is too large."; + }); + UP.Count = 0; + } + } else { + UP.Count = TripCount; + } + if (UP.Count > UP.MaxCount) + UP.Count = UP.MaxCount; + if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && + UP.Count != TripCount) + ORE->emit([&]() { + return OptimizationRemarkMissed(DEBUG_TYPE, + "FullUnrollAsDirectedTooLarge", + L->getStartLoc(), L->getHeader()) + << "Unable to fully unroll loop as directed by unroll pragma " + "because " + "unrolled size is too large."; + }); + LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count + << "\n"); + return ExplicitUnroll; + } + } + UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount; + + return ExplicitUnroll; +} // Returns true if unroll count was set explicitly. // Calculates unroll count and writes it to UP.Count. // Unless IgnoreUser is true, will also use metadata and command-line options @@ -761,6 +934,7 @@ // FIXME: This function is used by LoopUnroll and LoopUnrollAndJam, but consumes // many LoopUnroll-specific options. The shared functionality should be // refactored into it own function. + bool llvm::computeUnrollCount( Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const SmallPtrSetImpl &EphValues, @@ -783,104 +957,13 @@ return true; } - // Check for explicit Count. - // 1st priority is unroll count set by "unroll-count" option. - bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0; - if (UserUnrollCount) { - UP.Count = UnrollCount; - UP.AllowExpensiveTripCount = true; - UP.Force = true; - if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold) - return true; - } + bool tryFullUnroll = true; - // 2nd priority is unroll count set by pragma. - unsigned PragmaCount = unrollCountPragmaValue(L); - if (PragmaCount > 0) { - UP.Count = PragmaCount; - UP.Runtime = true; - UP.AllowExpensiveTripCount = true; - UP.Force = true; - if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) && - UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold) - return true; - } - bool PragmaFullUnroll = hasUnrollFullPragma(L); - if (PragmaFullUnroll && TripCount != 0) { - UP.Count = TripCount; - if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold) - return false; - } + bool ExplicitUnroll = false; - bool PragmaEnableUnroll = hasUnrollEnablePragma(L); - bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll || - PragmaEnableUnroll || UserUnrollCount; - - if (ExplicitUnroll && TripCount != 0) { - // If the loop has an unrolling pragma, we want to be more aggressive with - // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold - // value which is larger than the default limits. - UP.Threshold = std::max(UP.Threshold, PragmaUnrollThreshold); - UP.PartialThreshold = - std::max(UP.PartialThreshold, PragmaUnrollThreshold); - } - - // 3rd priority is full unroll count. - // Full unroll makes sense only when TripCount or its upper bound could be - // statically calculated. - // Also we need to check if we exceed FullUnrollMaxCount. - // If using the upper bound to unroll, TripMultiple should be set to 1 because - // we do not know when loop may exit. - - // We can unroll by the upper bound amount if it's generally allowed or if - // we know that the loop is executed either the upper bound or zero times. - // (MaxOrZero unrolling keeps only the first loop test, so the number of - // loop tests remains the same compared to the non-unrolled version, whereas - // the generic upper bound unrolling keeps all but the last loop test so the - // number of loop tests goes up which may end up being worse on targets with - // constrained branch predictor resources so is controlled by an option.) - // In addition we only unroll small upper bounds. - unsigned FullUnrollMaxTripCount = MaxTripCount; - if (!(UP.UpperBound || MaxOrZero) || - FullUnrollMaxTripCount > UnrollMaxUpperBound) - FullUnrollMaxTripCount = 0; - - // UnrollByMaxCount and ExactTripCount cannot both be non zero since we only - // compute the former when the latter is zero. - unsigned ExactTripCount = TripCount; - assert((ExactTripCount == 0 || FullUnrollMaxTripCount == 0) && - "ExtractTripCount and UnrollByMaxCount cannot both be non zero."); - - unsigned FullUnrollTripCount = - ExactTripCount ? ExactTripCount : FullUnrollMaxTripCount; - UP.Count = FullUnrollTripCount; - if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) { - // When computing the unrolled size, note that BEInsns are not replicated - // like the rest of the loop body. - if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) { - UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); - TripCount = FullUnrollTripCount; - TripMultiple = UP.UpperBound ? 1 : TripMultiple; - return ExplicitUnroll; - } else { - // The loop isn't that small, but we still can fully unroll it if that - // helps to remove a significant number of instructions. - // To check that, run additional analysis on the loop. - if (Optional Cost = analyzeLoopUnrollCost( - L, FullUnrollTripCount, DT, SE, EphValues, TTI, - UP.Threshold * UP.MaxPercentThresholdBoost / 100, - UP.MaxIterationsCountToAnalyze)) { - unsigned Boost = - getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost); - if (Cost->UnrolledCost < UP.Threshold * Boost / 100) { - UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount); - TripCount = FullUnrollTripCount; - TripMultiple = UP.UpperBound ? 1 : TripMultiple; - return ExplicitUnroll; - } - } - } - } + ExplicitUnroll = unrollingLogic(tryFullUnroll, L, TTI, DT, SE, EphValues, ORE, + MaxOrZero, LoopSize, TripMultiple, TripCount, + MaxTripCount, UCE, UP, PP, UseUpperBound); // 4th priority is loop peeling. computePeelCount(L, LoopSize, PP, TripCount, SE, UP.Threshold); @@ -889,81 +972,14 @@ UP.Count = 1; return ExplicitUnroll; } + tryFullUnroll = false; + + ExplicitUnroll = unrollingLogic(tryFullUnroll, L, TTI, DT, SE, EphValues, ORE, + MaxOrZero, LoopSize, TripMultiple, TripCount, + MaxTripCount, UCE, UP, PP, UseUpperBound); - // 5th priority is partial unrolling. - // Try partial unroll only when TripCount could be statically calculated. - if (TripCount) { - UP.Partial |= ExplicitUnroll; - if (!UP.Partial) { - LLVM_DEBUG(dbgs() << " will not try to unroll partially because " - << "-unroll-allow-partial not given\n"); - UP.Count = 0; - return false; - } - if (UP.Count == 0) - UP.Count = TripCount; - if (UP.PartialThreshold != NoThreshold) { - // Reduce unroll count to be modulo of TripCount for partial unrolling. - if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold) - UP.Count = - (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) / - (LoopSize - UP.BEInsns); - if (UP.Count > UP.MaxCount) - UP.Count = UP.MaxCount; - while (UP.Count != 0 && TripCount % UP.Count != 0) - UP.Count--; - if (UP.AllowRemainder && UP.Count <= 1) { - // If there is no Count that is modulo of TripCount, set Count to - // largest power-of-two factor that satisfies the threshold limit. - // As we'll create fixup loop, do the type of unrolling only if - // remainder loop is allowed. - UP.Count = UP.DefaultUnrollRuntimeCount; - while (UP.Count != 0 && - UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold) - UP.Count >>= 1; - } - if (UP.Count < 2) { - if (PragmaEnableUnroll) - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, - "UnrollAsDirectedTooLarge", - L->getStartLoc(), L->getHeader()) - << "Unable to unroll loop as directed by unroll(enable) " - "pragma " - "because unrolled size is too large."; - }); - UP.Count = 0; - } - } else { - UP.Count = TripCount; - } - if (UP.Count > UP.MaxCount) - UP.Count = UP.MaxCount; - if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && - UP.Count != TripCount) - ORE->emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, - "FullUnrollAsDirectedTooLarge", - L->getStartLoc(), L->getHeader()) - << "Unable to fully unroll loop as directed by unroll pragma " - "because " - "unrolled size is too large."; - }); - LLVM_DEBUG(dbgs() << " partially unrolling with count: " << UP.Count - << "\n"); - return ExplicitUnroll; - } assert(TripCount == 0 && "All cases when TripCount is constant should be covered here."); - if (PragmaFullUnroll) - ORE->emit([&]() { - return OptimizationRemarkMissed( - DEBUG_TYPE, "CantFullUnrollAsDirectedRuntimeTripCount", - L->getStartLoc(), L->getHeader()) - << "Unable to fully unroll loop as directed by unroll(full) " - "pragma " - "because loop has a runtime trip count."; - }); // 6th priority is runtime unrolling. // Don't unroll a runtime trip count loop when it is disabled. @@ -989,7 +1005,8 @@ } // Reduce count based on the type of unrolling and the threshold values. - UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount; + // UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount; + if (!UP.Runtime) { LLVM_DEBUG( dbgs() << " will not try to unroll loop with runtime trip count " @@ -1023,7 +1040,7 @@ using namespace ore; - if (PragmaCount > 0 && !UP.AllowRemainder) + if (unrollCountPragmaValue(L) > 0 && !UP.AllowRemainder) ORE->emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "DifferentUnrollCountFromDirected",