Index: include/llvm/IR/DiagnosticInfo.h =================================================================== --- include/llvm/IR/DiagnosticInfo.h +++ include/llvm/IR/DiagnosticInfo.h @@ -559,6 +559,11 @@ /// diagnostic is generated. \p Msg is the message string to use. void emitLoopInterleaveWarning(LLVMContext &Ctx, const Function &Fn, const DebugLoc &DLoc, const Twine &Msg); +/// Emit a warning when loop unrolling is specified but fails. \p Fn is the +/// function triggering the warning, \p DLoc is the debug location where the +/// diagnostic is generated. \p Msg is the message string to use. +void emitLoopUnrollingWarning(LLVMContext &Ctx, const Function &Fn, + const DebugLoc &DLoc, const Twine &Msg); } // End namespace llvm Index: lib/IR/DiagnosticInfo.cpp =================================================================== --- lib/IR/DiagnosticInfo.cpp +++ lib/IR/DiagnosticInfo.cpp @@ -233,3 +233,12 @@ Ctx.diagnose(DiagnosticInfoOptimizationFailure( Fn, DLoc, Twine("loop not interleaved: " + Msg))); } + +void llvm::emitLoopUnrollingWarning(LLVMContext &Ctx, const Function &Fn, + const DebugLoc &DLoc, const Twine &Msg) { + // Unlike the vectorize and interleave warnings, do not emit a prefix with the + // unrolling warning message because a warning can be emitted in two + // difference cases: no unrolling was performed, or the loop was unrolled less + // than a pragma indicated. + Ctx.diagnose(DiagnosticInfoOptimizationFailure(Fn, DLoc, Msg)); +} Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -36,7 +36,8 @@ using namespace llvm; -#define DEBUG_TYPE "loop-unroll" +#define LU_NAME "loop-unroll" +#define DEBUG_TYPE LU_NAME static cl::opt UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, @@ -78,161 +79,147 @@ "unroll_count pragma.")); namespace { - class LoopUnroll : public LoopPass { +class LoopDescription; +class UnrollLimits; + +// Set of unrolling parameters and preferences which don't change through the +// pass's lifetime. +class UnrollParameters { +public: + // Encapsulates a single loop unrolling parameter which may be set by flag + // or via pass constructor parameters. + template class Parameter { public: - static char ID; // Pass ID, replacement for typeid - LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) { - CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T); - CurrentPercentDynamicCostSavedThreshold = - UnrollPercentDynamicCostSavedThreshold; - CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount; - CurrentCount = (C == -1) ? UnrollCount : unsigned(C); - CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; - CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R; - - UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); - UserPercentDynamicCostSavedThreshold = - (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0); - UserDynamicCostSavingsDiscount = - (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0); - UserAllowPartial = (P != -1) || - (UnrollAllowPartial.getNumOccurrences() > 0); - UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0); - UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0); + T Value; + bool IsSetByUser; + Parameter(cl::opt &CommandLineOption, int ArgumentValue = -1) { + if (ArgumentValue == -1) { + Value = CommandLineOption; + IsSetByUser = CommandLineOption.getNumOccurrences() > 0; + } else { + Value = (T)ArgumentValue; + IsSetByUser = true; + } + }; + }; - initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); - } + UnrollParameters(int Threshold, int Count, int AllowPartial, int AllowRuntime) + : Threshold(UnrollThreshold, Threshold), Count(UnrollCount, Count), + AllowPartialUnrolling(UnrollAllowPartial, AllowPartial), + AllowRuntimeUnrolling(UnrollRuntime, AllowRuntime), + PercentDynamicCostSavedThreshold( + UnrollPercentDynamicCostSavedThreshold), + DynamicCostSavingsDiscount(UnrollDynamicCostSavingsDiscount) {} + + Parameter Threshold; + Parameter Count; + Parameter AllowPartialUnrolling; + Parameter AllowRuntimeUnrolling; + Parameter PercentDynamicCostSavedThreshold; + Parameter DynamicCostSavingsDiscount; +}; - /// A magic value for use with the Threshold parameter to indicate - /// that the loop unroll should be performed regardless of how much - /// code expansion would result. - static const unsigned NoThreshold = UINT_MAX; - - // Threshold to use when optsize is specified (and there is no - // explicit -unroll-threshold). - static const unsigned OptSizeUnrollThreshold = 50; - - // Default unroll count for loops with run-time trip count if - // -unroll-count is not set - static const unsigned UnrollRuntimeCount = 8; - - unsigned CurrentCount; - unsigned CurrentThreshold; - unsigned CurrentPercentDynamicCostSavedThreshold; - unsigned CurrentDynamicCostSavingsDiscount; - bool CurrentAllowPartial; - bool CurrentRuntime; - - // Flags for whether the 'current' settings are user-specified. - bool UserCount; - bool UserThreshold; - bool UserPercentDynamicCostSavedThreshold; - bool UserDynamicCostSavingsDiscount; - bool UserAllowPartial; - bool UserRuntime; - - bool runOnLoop(Loop *L, LPPassManager &LPM) override; - - /// This transformation requires natural loop information & requires that - /// loop preheaders be inserted into the CFG... - /// - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequiredID(LoopSimplifyID); - AU.addPreservedID(LoopSimplifyID); - AU.addRequiredID(LCSSAID); - AU.addPreservedID(LCSSAID); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. - // If loop unroll does not preserve dom info then LCSSA pass on next - // loop will receive invalid dom info. - // For now, recreate dom info, if loop is unrolled. - AU.addPreserved(); - AU.addPreserved(); - } +class LoopUnroll : public LoopPass { +public: + static char ID; // Pass ID, replacement for typeid + LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) + : LoopPass(ID), Parameters(T, C, P, R) { + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); + } - // Fill in the UnrollingPreferences parameter with values from the - // TargetTransformationInfo. - void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI, - TargetTransformInfo::UnrollingPreferences &UP) { - UP.Threshold = CurrentThreshold; - UP.PercentDynamicCostSavedThreshold = - CurrentPercentDynamicCostSavedThreshold; - UP.DynamicCostSavingsDiscount = CurrentDynamicCostSavingsDiscount; - UP.OptSizeThreshold = OptSizeUnrollThreshold; - UP.PartialThreshold = CurrentThreshold; - UP.PartialOptSizeThreshold = OptSizeUnrollThreshold; - UP.Count = CurrentCount; - UP.MaxCount = UINT_MAX; - UP.Partial = CurrentAllowPartial; - UP.Runtime = CurrentRuntime; - UP.AllowExpensiveTripCount = false; - TTI.getUnrollingPreferences(L, UP); - } + /// A magic value for use with the Threshold parameter to indicate + /// that the loop unroll should be performed regardless of how much + /// code expansion would result. + static const unsigned NoThreshold = UINT_MAX; + + // Threshold to use when optsize is specified (and there is no + // explicit -unroll-threshold). + static const unsigned OptSizeUnrollThreshold = 50; + + // Default unroll count for loops with run-time trip count if + // -unroll-count is not set + static const unsigned UnrollRuntimeCount = 8; + + bool runOnLoop(Loop *L, LPPassManager &LPM) override; - // Select and return an unroll count based on parameters from - // user, unroll preferences, unroll pragmas, or a heuristic. - // SetExplicitly is set to true if the unroll count is is set by - // the user or a pragma rather than selected heuristically. - unsigned - selectUnrollCount(const Loop *L, unsigned TripCount, bool PragmaFullUnroll, - unsigned PragmaCount, - const TargetTransformInfo::UnrollingPreferences &UP, - bool &SetExplicitly); - - // Select threshold values used to limit unrolling based on a - // total unrolled size. Parameters Threshold and PartialThreshold - // are set to the maximum unrolled size for fully and partially - // unrolled loops respectively. - void selectThresholds(const Loop *L, bool UsePragmaThreshold, + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG... + /// + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequiredID(LoopSimplifyID); + AU.addPreservedID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addPreservedID(LCSSAID); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info. + // If loop unroll does not preserve dom info then LCSSA pass on next + // loop will receive invalid dom info. + // For now, recreate dom info, if loop is unrolled. + AU.addPreserved(); + AU.addPreserved(); + } + +protected: + AssumptionCache *AC; + DominatorTree *DT; + LoopInfo *LI; + ScalarEvolution *SE; + TargetTransformInfo *TTI; + + UnrollParameters Parameters; + + // Fill in the UnrollingPreferences parameter with values from + // UnrollingParameters and TargetTransformationInfo. + void getUnrollingPreferences(Loop *L, + TargetTransformInfo::UnrollingPreferences &UP) { + UP.Threshold = Parameters.Threshold.Value; + UP.PercentDynamicCostSavedThreshold = + Parameters.PercentDynamicCostSavedThreshold.Value; + UP.DynamicCostSavingsDiscount = Parameters.DynamicCostSavingsDiscount.Value; + UP.OptSizeThreshold = OptSizeUnrollThreshold; + UP.PartialThreshold = Parameters.Threshold.Value; + UP.PartialOptSizeThreshold = OptSizeUnrollThreshold; + UP.Count = Parameters.Count.Value; + UP.MaxCount = UINT_MAX; + UP.Partial = Parameters.AllowPartialUnrolling.Value; + UP.Runtime = Parameters.AllowRuntimeUnrolling.Value; + UP.AllowExpensiveTripCount = false; + TTI->getUnrollingPreferences(L, UP); + } + + // Select an unroll factor for the given loop. This factor may come from + // pragmas, arguments passed to the unrolling pass constructor, command-line + // flags, or may be chosen heuristically. The factor is chosen without regard + // to any unrolled size limits (thresholds) which are applied later. + // SetExplicitly is set to true if the returned value is set explicitly + // (by pragma, user, etc) rather than chosen heuristically. + unsigned + selectUnrollCount(const LoopDescription &Desc, + const TargetTransformInfo::UnrollingPreferences &UP, + bool &SetExplicitly); + + // Select threshold values used to limit unrolling based on a total unrolled + // size. + void selectUnrollLimits(const LoopDescription &Desc, const TargetTransformInfo::UnrollingPreferences &UP, - unsigned &Threshold, unsigned &PartialThreshold, - unsigned &PercentDynamicCostSavedThreshold, - unsigned &DynamicCostSavingsDiscount) { - // Determine the current unrolling threshold. While this is - // normally set from UnrollThreshold, it is overridden to a - // smaller value if the current function is marked as - // optimize-for-size, and the unroll threshold was not user - // specified. - Threshold = UserThreshold ? CurrentThreshold : UP.Threshold; - PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold; - PercentDynamicCostSavedThreshold = - UserPercentDynamicCostSavedThreshold - ? CurrentPercentDynamicCostSavedThreshold - : UP.PercentDynamicCostSavedThreshold; - DynamicCostSavingsDiscount = UserDynamicCostSavingsDiscount - ? CurrentDynamicCostSavingsDiscount - : UP.DynamicCostSavingsDiscount; - - if (!UserThreshold && - // FIXME: Use Function::optForSize(). - L->getHeader()->getParent()->hasFnAttribute( - Attribute::OptimizeForSize)) { - Threshold = UP.OptSizeThreshold; - PartialThreshold = UP.PartialOptSizeThreshold; - } - if (UsePragmaThreshold) { - // If the loop has an unrolling pragma, we want to be more - // aggressive with unrolling limits. Set thresholds to at - // least the PragmaTheshold value which is larger than the - // default limits. - if (Threshold != NoThreshold) - Threshold = std::max(Threshold, PragmaUnrollThreshold); - if (PartialThreshold != NoThreshold) - PartialThreshold = - std::max(PartialThreshold, PragmaUnrollThreshold); - } - } - bool canUnrollCompletely(Loop *L, unsigned Threshold, - unsigned PercentDynamicCostSavedThreshold, - unsigned DynamicCostSavingsDiscount, - uint64_t UnrolledCost, uint64_t RolledDynamicCost); - }; + UnrollLimits &Limits); + + // Apply unroll limits to the given unroll factor (Count). Return a reduced + // factor if Count exceeds the given limits, or return Count otherwise. + unsigned applyUnrollLimits(unsigned Count, const LoopDescription &Desc, + const UnrollLimits &Limits); + + // Returns true if loop can be unrolled completely with the given size + // limits. Analysis incorporates dynamic cost savings. + bool canUnrollCompletely(Loop *L, const UnrollLimits &Limits, + uint64_t UnrolledCost, uint64_t RolledDynamicCost); +}; } char LoopUnroll::ID = 0; @@ -256,6 +243,213 @@ } namespace { +// Encapsulates all information about a loop necessary to make the unrolling +// decision. +class LoopDescription { +public: + LoopDescription(Loop *L, const TargetTransformInfo &TTI, ScalarEvolution &SE, + AssumptionCache &AC) + : TheLoop(L), F(L->getHeader()->getParent()), DLoc(L->getStartLoc()) { + HasUnrollDisablePragma = hasUnrollMetadata("llvm.loop.unroll.disable"); + HasUnrollEnablePragma = hasUnrollMetadata("llvm.loop.unroll.enable"); + HasUnrollFullPragma = hasUnrollMetadata("llvm.loop.unroll.full"); + HasRuntimeUnrollDisablePragma = + hasUnrollMetadata("llvm.loop.unroll.runtime.disable"); + PragmaUnrollCount = unrollCountPragmaValue(); + unsigned NumInlineCandidates; + bool notDuplicatable; + LoopSize = + approximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI, AC); + Duplicatable = !notDuplicatable && (NumInlineCandidates == 0); + + // Determine trip count and largest divisor of the trip count (used for + // partial unrolling if trip count cannot be determined). + TripCount = 0; + TripMultiple = 1; + // If there are multiple exiting blocks but one of them is the latch, use + // the latch for the trip count estimation. Otherwise insist on a single + // exiting block for the trip count estimation. + BasicBlock *ExitingBlock = L->getLoopLatch(); + if (!ExitingBlock || !L->isLoopExiting(ExitingBlock)) + ExitingBlock = L->getExitingBlock(); + if (ExitingBlock) { + TripCount = SE.getSmallConstantTripCount(L, ExitingBlock); + TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock); + } + } + + uint64_t unrolledSize(unsigned Count) const { + assert(LoopSize > 2 && "Loop size should be larger than 2"); + return ((uint64_t)LoopSize - 2) * Count + 2; + } + + // Return the largest unroll factor which just fits within the given + // threshold. + unsigned maxUnrollFactor(unsigned Threshold) const { + assert(LoopSize > 2 && "Loop size should be larger than 2"); + return (std::max(Threshold, 3u) - 2) / (LoopSize - 2); + } + + // Emit optimization analysis remark (shown with -pass-remarks-analysis + // option). These remarks provide detail about the loops as they are + // considered for unrolling. + void emitAnalysisRemark(const Twine &Message) const { + // For loops with unroll enabling pragmas we want to always emit the + // diagnostic: when -Rpass-analysis=loop-unroll is used, and when + // -Rpass-analysis alone is used (without an argument). + const char *Name; + if (HasUnrollEnablePragma || HasUnrollFullPragma || PragmaUnrollCount > 0) + Name = DiagnosticInfo::AlwaysPrint; + else + Name = LU_NAME; + + emitOptimizationRemarkAnalysis(F->getContext(), Name, *F, DLoc, Message); + } + + // Emit an optimization analysis remark which includes all information + // about the loop relevant to loop unrolling. + void emitDescriptionRemark() const { + std::string Message; + raw_string_ostream S(Message); + S << "candidate loop for unrolling: loop body size = " << LoopSize; + if (TripCount > 0) + S << ", trip count = " << TripCount; + else if (TripMultiple > 1) + S << ", trip count is multiple of " << TripMultiple; + else + S << ", trip count is unknown"; + + if (!Duplicatable) + S << ", loop body not duplicatable"; + + if (HasUnrollDisablePragma) + S << ", loop has disable unrolling pragma"; + else if (HasUnrollEnablePragma) + S << ", loop has enable unrolling pragma"; + else if (HasUnrollFullPragma) + S << ", loop has full unroll pragma"; + else if (HasRuntimeUnrollDisablePragma) + S << ", loop has disable runtime unrolling pragma"; + else if (PragmaUnrollCount > 0) + S << ", loop has pragma suggesting an unroll factor of " + << PragmaUnrollCount; + emitAnalysisRemark(S.str()); + } + + // Emit optimization missed remark (shown with -pass-remarks-missed + // option). These remarks indicate that unrolling might have been beneficial + // but unrolling could not be performed or the unroll factor was smaller than + // desired. UnrollCount indicates how much unrolling was performed. Zero means + // no unrolling was performed. + void emitMissedRemark(const Twine &Reason, unsigned UnrollCount = 0) const { + std::string Message; + raw_string_ostream S(Message); + if (UnrollCount) + S << "unrolling loop less than directed: "; + else + S << "loop not unrolled: "; + S << Reason; + emitOptimizationRemarkMissed(F->getContext(), LU_NAME, *F, DLoc, S.str()); + // If the loop contains a non-disabling unroll pragma then emit a warning + // that the pragma directive could not be followed. + std::string Warning; + raw_string_ostream W(Warning); + if (HasUnrollEnablePragma) { + assert(!UnrollCount && + "missed remark should not be emitted if unrolling"); + W << "loop has full unroll pragma but was not unrolled: " << Reason; + } else if (HasUnrollFullPragma) { + assert(!UnrollCount && + "missed remark should not be emitted if unrolling"); + W << "loop has full unroll pragma but was not unrolled: " << Reason; + } else if (PragmaUnrollCount > 0) { + W << "loop could not be unrolled " << PragmaUnrollCount + << " times as directed by pragma, unrolled " << UnrollCount + << " times instead: " << Reason; + } + if (W.str().size() > 0) + emitLoopUnrollingWarning(F->getContext(), *F, DLoc, W.str()); + } + + Loop *TheLoop; + bool HasUnrollDisablePragma; + bool HasUnrollEnablePragma; + bool HasUnrollFullPragma; + bool HasRuntimeUnrollDisablePragma; + unsigned PragmaUnrollCount; + unsigned LoopSize; + bool Duplicatable; + unsigned TripCount; + unsigned TripMultiple; + +protected: + // Full unrolling may eliminate the loop (invalidating TheLoop) so save + // function and location of the loop on LoopDescription construction to enable + // emitting remarks after the loop has been destroyed. + const Function *F; + const DebugLoc DLoc; + + unsigned approximateLoopSize(const Loop *L, unsigned &NumCalls, + bool &NotDuplicatable, + const TargetTransformInfo &TTI, + AssumptionCache &AC) { + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(L, &AC, EphValues); + + CodeMetrics Metrics; + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; + ++I) + Metrics.analyzeBasicBlock(*I, TTI, EphValues); + NumCalls = Metrics.NumInlineCandidates; + NotDuplicatable = Metrics.notDuplicatable; + + unsigned LoopSize = Metrics.NumInsts; + + // Don't allow an estimate of size zero. This would allows unrolling of + // loops with huge iteration counts, which is a compile time problem even if + // it's not a problem for code quality. Also, the code using this size may + // assume that each loop has at least three instructions (likely a + // conditional branch, a comparison feeding that branch, and some kind of + // loop increment feeding that comparison instruction). + LoopSize = std::max(LoopSize, 3u); + + return LoopSize; + } + + MDNode *unrollMetadata(StringRef Name) { + if (MDNode *LoopID = TheLoop->getLoopID()) + return GetUnrollMetadata(LoopID, Name); + return nullptr; + } + + bool hasUnrollMetadata(StringRef Name) { + return unrollMetadata(Name) != nullptr; + } + + unsigned unrollCountPragmaValue() { + if (MDNode *MD = unrollMetadata("llvm.loop.unroll.count")) { + assert(MD->getNumOperands() == 2 && + "Unroll count hint metadata should have two operands."); + unsigned Count = + mdconst::extract(MD->getOperand(1))->getZExtValue(); + assert(Count >= 1 && "Unroll count must be positive."); + return Count; + } + return 0; + } +}; + +// Collection of contraints and maximum limits (thresholds) which may be +// imposed on the unroll factor for a loop. +struct UnrollLimits { + bool AllowPartial; + bool AllowRuntime; + unsigned FullThreshold; + unsigned PartialThreshold; + unsigned PercentDynamicCostSavedThreshold; + unsigned DynamicCostSavingsDiscount; +}; + // This class is used to get an estimate of the optimization effects that we // could get from complete loop unrolling. It comes from the fact that some // loads might be replaced with concrete constant values and that could trigger @@ -527,7 +721,7 @@ /// the analysis failed (no benefits expected from the unrolling, or the loop is /// too big to analyze), the returned value is None. static Optional -analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, +analyzeLoopUnrollCost(const LoopDescription &Desc, DominatorTree &DT, ScalarEvolution &SE, const TargetTransformInfo &TTI, int MaxUnrolledLoopSize) { // We want to be able to scale offsets by the trip count and add more offsets @@ -537,8 +731,8 @@ "The unroll iterations max is too large!"); // Don't simulate loops with a big or unknown tripcount - if (!UnrollMaxIterationsCountToAnalyze || !TripCount || - TripCount > UnrollMaxIterationsCountToAnalyze) + if (!UnrollMaxIterationsCountToAnalyze || !Desc.TripCount || + Desc.TripCount > UnrollMaxIterationsCountToAnalyze) return None; SmallSetVector BBWorklist; @@ -557,8 +751,9 @@ // Ensure that we don't violate the loop structure invariants relied on by // this analysis. - assert(L->isLoopSimplifyForm() && "Must put loop into normal form first."); - assert(L->isLCSSAForm(DT) && + assert(Desc.TheLoop->isLoopSimplifyForm() && + "Must put loop into normal form first."); + assert(Desc.TheLoop->isLCSSAForm(DT) && "Must have loops in LCSSA form to track live-out values."); DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); @@ -567,12 +762,12 @@ // which would be simplified. // Since the same load will take different values on different iterations, // we literally have to go through all loop's iterations. - for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { + for (unsigned Iteration = 0; Iteration < Desc.TripCount; ++Iteration) { DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n"); // Prepare for the iteration by collecting any simplified entry or backedge // inputs. - for (Instruction &I : *L->getHeader()) { + for (Instruction &I : *Desc.TheLoop->getHeader()) { auto *PHI = dyn_cast(&I); if (!PHI) break; @@ -584,7 +779,8 @@ "Must have an incoming value only for the preheader and the latch."); Value *V = PHI->getIncomingValueForBlock( - Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); + Iteration == 0 ? Desc.TheLoop->getLoopPreheader() + : Desc.TheLoop->getLoopLatch()); Constant *C = dyn_cast(V); if (Iteration != 0 && !C) C = SimplifiedValues.lookup(V); @@ -597,10 +793,11 @@ while (!SimplifiedInputValues.empty()) SimplifiedValues.insert(SimplifiedInputValues.pop_back_val()); - UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE); + UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, Desc.TheLoop, + SE); BBWorklist.clear(); - BBWorklist.insert(L->getHeader()); + BBWorklist.insert(Desc.TheLoop->getHeader()); // Note that we *must not* cache the size, this loop grows the worklist. for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { BasicBlock *BB = BBWorklist[Idx]; @@ -651,7 +848,7 @@ else Succ = BI->getSuccessor( cast(SimpleCond)->isZero() ? 1 : 0); - if (L->contains(Succ)) + if (Desc.TheLoop->contains(Succ)) BBWorklist.insert(Succ); continue; } @@ -666,7 +863,7 @@ else Succ = SI->findCaseValue(cast(SimpleCond)) .getCaseSuccessor(); - if (L->contains(Succ)) + if (Desc.TheLoop->contains(Succ)) BBWorklist.insert(Succ); continue; } @@ -674,7 +871,7 @@ // Add BB's successors to the worklist. for (BasicBlock *Succ : successors(BB)) - if (L->contains(Succ)) + if (Desc.TheLoop->contains(Succ)) BBWorklist.insert(Succ); } @@ -692,79 +889,6 @@ return {{UnrolledCost, RolledDynamicCost}}; } -/// ApproximateLoopSize - Approximate the size of the loop. -static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, - bool &NotDuplicatable, - const TargetTransformInfo &TTI, - AssumptionCache *AC) { - SmallPtrSet EphValues; - CodeMetrics::collectEphemeralValues(L, AC, EphValues); - - CodeMetrics Metrics; - for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); - I != E; ++I) - Metrics.analyzeBasicBlock(*I, TTI, EphValues); - NumCalls = Metrics.NumInlineCandidates; - NotDuplicatable = Metrics.notDuplicatable; - - unsigned LoopSize = Metrics.NumInsts; - - // Don't allow an estimate of size zero. This would allows unrolling of loops - // with huge iteration counts, which is a compile time problem even if it's - // not a problem for code quality. Also, the code using this size may assume - // that each loop has at least three instructions (likely a conditional - // branch, a comparison feeding that branch, and some kind of loop increment - // feeding that comparison instruction). - LoopSize = std::max(LoopSize, 3u); - - return LoopSize; -} - -// Returns the loop hint metadata node with the given name (for example, -// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is -// returned. -static MDNode *GetUnrollMetadataForLoop(const Loop *L, StringRef Name) { - if (MDNode *LoopID = L->getLoopID()) - return GetUnrollMetadata(LoopID, Name); - return nullptr; -} - -// Returns true if the loop has an unroll(full) pragma. -static bool HasUnrollFullPragma(const Loop *L) { - return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full"); -} - -// Returns true if the loop has an unroll(enable) pragma. This metadata is used -// for both "#pragma unroll" and "#pragma clang loop unroll(enable)" directives. -static bool HasUnrollEnablePragma(const Loop *L) { - return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.enable"); -} - -// Returns true if the loop has an unroll(disable) pragma. -static bool HasUnrollDisablePragma(const Loop *L) { - return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable"); -} - -// Returns true if the loop has an runtime unroll(disable) pragma. -static bool HasRuntimeUnrollDisablePragma(const Loop *L) { - return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable"); -} - -// If loop has an unroll_count pragma return the (necessarily -// positive) value from the pragma. Otherwise return 0. -static unsigned UnrollCountPragmaValue(const Loop *L) { - MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count"); - if (MD) { - assert(MD->getNumOperands() == 2 && - "Unroll count hint metadata should have two operands."); - unsigned Count = - mdconst::extract(MD->getOperand(1))->getZExtValue(); - assert(Count >= 1 && "Unroll count must be positive."); - return Count; - } - return 0; -} - // Remove existing unroll metadata and add unroll disable metadata to // indicate the loop has already been unrolled. This prevents a loop // from being unrolled more than is directed by a pragma if the loop @@ -801,20 +925,18 @@ L->setLoopID(NewLoopID); } -bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold, - unsigned PercentDynamicCostSavedThreshold, - unsigned DynamicCostSavingsDiscount, +bool LoopUnroll::canUnrollCompletely(Loop *L, const UnrollLimits &Limits, uint64_t UnrolledCost, uint64_t RolledDynamicCost) { - if (Threshold == NoThreshold) { + if (Limits.FullThreshold == NoThreshold) { DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n"); return true; } - if (UnrolledCost <= Threshold) { + if (UnrolledCost <= Limits.FullThreshold) { DEBUG(dbgs() << " Can fully unroll, because unrolled cost: " - << UnrolledCost << "<" << Threshold << "\n"); + << UnrolledCost << "<" << Limits.FullThreshold << "\n"); return true; } @@ -829,24 +951,25 @@ unsigned PercentDynamicCostSaved = (uint64_t)(RolledDynamicCost - UnrolledCost) * 100ull / RolledDynamicCost; - if (PercentDynamicCostSaved >= PercentDynamicCostSavedThreshold && - (int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <= - (int64_t)Threshold) { + if (PercentDynamicCostSaved >= Limits.PercentDynamicCostSavedThreshold && + (int64_t)UnrolledCost - (int64_t)Limits.DynamicCostSavingsDiscount <= + (int64_t)Limits.FullThreshold) { DEBUG(dbgs() << " Can fully unroll, because unrolling will reduce the " - "expected dynamic cost by " << PercentDynamicCostSaved - << "% (threshold: " << PercentDynamicCostSavedThreshold - << "%)\n" + "expected dynamic cost by " + << PercentDynamicCostSaved << "% (threshold: " + << Limits.PercentDynamicCostSavedThreshold << "%)\n" << " and the unrolled cost (" << UnrolledCost << ") is less than the max threshold (" - << DynamicCostSavingsDiscount << ").\n"); + << Limits.DynamicCostSavingsDiscount << ").\n"); return true; } DEBUG(dbgs() << " Too large to fully unroll:\n"); - DEBUG(dbgs() << " Threshold: " << Threshold << "\n"); - DEBUG(dbgs() << " Max threshold: " << DynamicCostSavingsDiscount << "\n"); + DEBUG(dbgs() << " Threshold: " << Limits.FullThreshold << "\n"); + DEBUG(dbgs() << " Max threshold: " << Limits.DynamicCostSavingsDiscount + << "\n"); DEBUG(dbgs() << " Percent cost saved threshold: " - << PercentDynamicCostSavedThreshold << "%\n"); + << Limits.PercentDynamicCostSavedThreshold << "%\n"); DEBUG(dbgs() << " Unrolled cost: " << UnrolledCost << "\n"); DEBUG(dbgs() << " Rolled dynamic cost: " << RolledDynamicCost << "\n"); DEBUG(dbgs() << " Percent cost saved: " << PercentDynamicCostSaved @@ -855,241 +978,290 @@ } unsigned LoopUnroll::selectUnrollCount( - const Loop *L, unsigned TripCount, bool PragmaFullUnroll, - unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP, - bool &SetExplicitly) { - SetExplicitly = true; - + const LoopDescription &Desc, + const TargetTransformInfo::UnrollingPreferences &UP, bool &SetExplicitly) { + unsigned Count = 0; + SetExplicitly = false; // User-specified count (either as a command-line option or // constructor parameter) has highest precedence. - unsigned Count = UserCount ? CurrentCount : 0; + if (Parameters.Count.IsSetByUser && Parameters.Count.Value > 0) { + Count = Parameters.Count.Value; + SetExplicitly = true; + } // If there is no user-specified count, unroll pragmas have the next // highest precedence. - if (Count == 0) { - if (PragmaCount) { - Count = PragmaCount; - } else if (PragmaFullUnroll) { - Count = TripCount; - } + if (Count == 0 && Desc.PragmaUnrollCount > 0) { + Count = Desc.PragmaUnrollCount; + SetExplicitly = true; } - if (Count == 0) + if (Count == 0 && (Desc.HasUnrollEnablePragma || Desc.HasUnrollFullPragma) && + Desc.TripCount > 0) + Count = Desc.TripCount; + + if (Count == 0 && UP.Count > 0) { Count = UP.Count; + SetExplicitly = true; + } if (Count == 0) { - SetExplicitly = false; - if (TripCount == 0) - // Runtime trip count. - Count = UnrollRuntimeCount; + if (Desc.TripCount > 0) + // Optimistically try to unroll the loop fully if the trip count is known. + Count = Desc.TripCount; else - // Conservative heuristic: if we know the trip count, see if we can - // completely unroll (subject to the threshold, checked below); otherwise - // try to find greatest modulo of the trip count which is still under - // threshold value. - Count = TripCount; + // Select unroll factor heuristically. + Count = UnrollRuntimeCount; } - if (TripCount && Count > TripCount) - return TripCount; + + if (Desc.TripCount > 0) + // We can't unroll more than the trip count. + Count = std::min(Desc.TripCount, Count); + + // And we can't unroll more than UP.MaxCount + Count = std::min(UP.MaxCount, Count); + return Count; } +void LoopUnroll::selectUnrollLimits( + const LoopDescription &Desc, + const TargetTransformInfo::UnrollingPreferences &UP, UnrollLimits &Limits) { + // Set all thresholds and values to either the value set by the user (via + // command line opt or pass constructor argument), or if not set by the user + // then the TTI unrolling preferences value. + if (Parameters.AllowPartialUnrolling.IsSetByUser) + Limits.AllowPartial = Parameters.AllowPartialUnrolling.Value; + else + Limits.AllowPartial = UP.Partial; + + if (Parameters.AllowRuntimeUnrolling.IsSetByUser) + Limits.AllowRuntime = Parameters.AllowRuntimeUnrolling.Value; + else + Limits.AllowRuntime = UP.Runtime; + + if (Parameters.Threshold.IsSetByUser) { + Limits.FullThreshold = Parameters.Threshold.Value; + Limits.PartialThreshold = Parameters.Threshold.Value; + } else { + Limits.FullThreshold = UP.Threshold; + Limits.PartialThreshold = UP.PartialThreshold; + } + + if (Parameters.PercentDynamicCostSavedThreshold.IsSetByUser) + Limits.PercentDynamicCostSavedThreshold = + Parameters.PercentDynamicCostSavedThreshold.Value; + else + Limits.PercentDynamicCostSavedThreshold = + UP.PercentDynamicCostSavedThreshold; + + if (Parameters.DynamicCostSavingsDiscount.IsSetByUser) + Limits.DynamicCostSavingsDiscount = + Parameters.DynamicCostSavingsDiscount.Value; + else + Limits.DynamicCostSavingsDiscount = UP.DynamicCostSavingsDiscount; + + // Adjust limits based on special circumstances. + + if (Desc.TheLoop->getHeader()->getParent()->optForSize() && + !Parameters.Threshold.IsSetByUser) { + Limits.FullThreshold = UP.OptSizeThreshold; + Limits.PartialThreshold = UP.OptSizeThreshold; + } + + // Loops with unrolling pragmas should have a high threshold to ensure + // unrolling can be performed as directed. + if (Desc.HasUnrollEnablePragma || Desc.HasUnrollFullPragma || + Desc.PragmaUnrollCount > 0) { + Limits.FullThreshold = + std::max(Limits.FullThreshold, PragmaUnrollThreshold); + Limits.PartialThreshold = + std::max(Limits.PartialThreshold, PragmaUnrollThreshold); + } + + if (Desc.HasUnrollEnablePragma || (Desc.PragmaUnrollCount > 0)) { + Limits.AllowPartial = true; + Limits.AllowRuntime = true; + } + + if (Desc.HasRuntimeUnrollDisablePragma) + Limits.AllowRuntime = false; +} + +unsigned selectPartialUnrollFactor(const LoopDescription &Desc, + unsigned Threshold) { + assert(Desc.TripCount > 0 && + "Trip count must be known for partial unrolling"); + + // Pick unroll count which just fits within the threshold, then reduce to be + // modulo of TripCount for partial unrolling. + unsigned Count = Desc.maxUnrollFactor(Threshold); + while (Count != 0 && Desc.TripCount % Count != 0) + Count--; + return Count; +} + +unsigned LoopUnroll::applyUnrollLimits(unsigned Count, + const LoopDescription &Desc, + const UnrollLimits &Limits) { + assert(Count > 0 && "Expected non-zero unroll factor"); + if (Count == Desc.TripCount) { + // Full unrolling. + bool canFullyUnroll = Desc.unrolledSize(Count) <= Limits.FullThreshold; + if (!canFullyUnroll) { + // The loop isn't that small, but we still can fully unroll it if + // unrolling helps to remove a significant number of instructions. To + // check that, run additional analysis on the loop. + if (Optional Cost = analyzeLoopUnrollCost( + Desc, *DT, *SE, *TTI, + Limits.FullThreshold + Limits.DynamicCostSavingsDiscount)) + canFullyUnroll = canUnrollCompletely( + Desc.TheLoop, Limits, Cost->UnrolledCost, Cost->RolledDynamicCost); + } + + if (canFullyUnroll) + return Count; + else + return selectPartialUnrollFactor(Desc, Limits.PartialThreshold); + } else if (Desc.TripCount > 0) { + // Partial unrolling. + if (Desc.unrolledSize(Count) <= Limits.PartialThreshold) + return Count; + else + return selectPartialUnrollFactor(Desc, Limits.PartialThreshold); + } else { + // Runtime unrolling. Reduce unroll count to be the largest power-of-two + // factor of the original count which satisfies the threshold limit. + while (Count != 0 && Desc.unrolledSize(Count) > Limits.PartialThreshold) + Count >>= 1; + return Count; + } +} + bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipOptnoneFunction(L)) return false; Function &F = *L->getHeader()->getParent(); - - auto &DT = getAnalysis().getDomTree(); - LoopInfo *LI = &getAnalysis().getLoopInfo(); - ScalarEvolution *SE = &getAnalysis().getSE(); - const TargetTransformInfo &TTI = - getAnalysis().getTTI(F); - auto &AC = getAnalysis().getAssumptionCache(F); + AC = &getAnalysis().getAssumptionCache(F); + DT = &getAnalysis().getDomTree(); + SE = &getAnalysis().getSE(); + LI = &getAnalysis().getLoopInfo(); + TTI = &getAnalysis().getTTI(F); BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() - << "] Loop %" << Header->getName() << "\n"); + << "] Loop %" << Header->getName() << "\n"); - if (HasUnrollDisablePragma(L)) { + const LoopDescription Desc(L, *TTI, *SE, *AC); + Desc.emitDescriptionRemark(); + + if (Desc.HasUnrollDisablePragma) { + DEBUG(dbgs() << " Not unrolling: loop has unroll disable pragma.\n"); + Desc.emitAnalysisRemark( + "loop not unrolled: unrolling is explicitly disabled"); return false; } - bool PragmaFullUnroll = HasUnrollFullPragma(L); - bool PragmaEnableUnroll = HasUnrollEnablePragma(L); - unsigned PragmaCount = UnrollCountPragmaValue(L); - bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0; TargetTransformInfo::UnrollingPreferences UP; - getUnrollingPreferences(L, TTI, UP); + getUnrollingPreferences(L, UP); + + // Select initial unroll factor. Also note if the unroll factor is set + // explicitly by the user or pragma. In this case, runtime and partial + // unrolling is allowed. + bool countSetExplicitly; + unsigned InitialCount = selectUnrollCount(Desc, UP, countSetExplicitly); + + // Unroll factor of zero or one is a NOP with the exception that unroll by one + // for a loop with a trip count of one is a useful transformation (full + // unrolling) which removes loop control flow. + if (InitialCount == 0 || + (InitialCount == 1 && InitialCount != Desc.TripCount)) { + DEBUG(dbgs() << " No loop unrolling to perform.\n"); + if (Desc.PragmaUnrollCount == 1) + Desc.emitAnalysisRemark( + "loop not unrolled: unrolling with a factor of one does nothing"); + else + Desc.emitAnalysisRemark( + "loop not unrolled: unrolling is not beneficial and not " + "explicitly forced"); + return false; + } - // Find trip count and trip multiple if count is not available - unsigned TripCount = 0; - unsigned TripMultiple = 1; - // If there are multiple exiting blocks but one of them is the latch, use the - // latch for the trip count estimation. Otherwise insist on a single exiting - // block for the trip count estimation. - BasicBlock *ExitingBlock = L->getLoopLatch(); - if (!ExitingBlock || !L->isLoopExiting(ExitingBlock)) - ExitingBlock = L->getExitingBlock(); - if (ExitingBlock) { - TripCount = SE->getSmallConstantTripCount(L, ExitingBlock); - TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock); - } - - // Select an initial unroll count. This may be reduced later based - // on size thresholds. - bool CountSetExplicitly; - unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll, - PragmaCount, UP, CountSetExplicitly); - - unsigned NumInlineCandidates; - bool notDuplicatable; - unsigned LoopSize = - ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI, &AC); - DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); - - // When computing the unrolled size, note that the conditional branch on the - // backedge and the comparison feeding it are not replicated like the rest of - // the loop body (which is why 2 is subtracted). - uint64_t UnrolledSize = (uint64_t)(LoopSize-2) * Count + 2; - if (notDuplicatable) { + if (!Desc.Duplicatable) { DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" << " instructions.\n"); + Desc.emitMissedRemark("loop contains non-duplicatable instructions"); return false; } - if (NumInlineCandidates != 0) { - DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); + + UnrollLimits Limits; + selectUnrollLimits(Desc, UP, Limits); + + unsigned Count = applyUnrollLimits(InitialCount, Desc, Limits); + if (Count == 0 || (Count == 1 && Count != Desc.TripCount)) { + DEBUG(dbgs() << " Loop is too large to unroll\n"); + Desc.emitMissedRemark("loop too large"); return false; } - unsigned Threshold, PartialThreshold; - unsigned PercentDynamicCostSavedThreshold; - unsigned DynamicCostSavingsDiscount; - // Only use the high pragma threshold when we have a target unroll factor such - // as with "#pragma unroll N" or a pragma indicating full unrolling and the - // trip count is known. Otherwise we rely on the standard threshold to - // heuristically select a reasonable unroll count. - bool UsePragmaThreshold = - PragmaCount > 0 || - ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0); - - selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold, - PercentDynamicCostSavedThreshold, - DynamicCostSavingsDiscount); - - // Given Count, TripCount and thresholds determine the type of - // unrolling which is to be performed. enum { Full = 0, Partial = 1, Runtime = 2 }; int Unrolling; - if (TripCount && Count == TripCount) { - Unrolling = Partial; - // If the loop is really small, we don't need to run an expensive analysis. - if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount, - UnrolledSize, UnrolledSize)) { - Unrolling = Full; - } else { - // The loop isn't that small, but we still can fully unroll it if that - // helps to remove a significant number of instructions. - // To check that, run additional analysis on the loop. - if (Optional Cost = - analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI, - Threshold + DynamicCostSavingsDiscount)) - if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold, - DynamicCostSavingsDiscount, Cost->UnrolledCost, - Cost->RolledDynamicCost)) { - Unrolling = Full; - } - } - } else if (TripCount && Count < TripCount) { + if (Desc.TripCount && Desc.TripCount == Count) + Unrolling = Full; + else if (Desc.TripCount) Unrolling = Partial; - } else { + else Unrolling = Runtime; - } - // Reduce count based on the type of unrolling and the threshold values. - unsigned OriginalCount = Count; - bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || - (UserRuntime ? CurrentRuntime : UP.Runtime); - // Don't unroll a runtime trip count loop with unroll full pragma. - if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) { - AllowRuntime = false; - } - if (Unrolling == Partial) { - bool AllowPartial = PragmaEnableUnroll || - (UserAllowPartial ? CurrentAllowPartial : UP.Partial); - if (!AllowPartial && !CountSetExplicitly) { - DEBUG(dbgs() << " will not try to unroll partially because " - << "-unroll-allow-partial not given\n"); - return false; - } - if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) { - // Reduce unroll count to be modulo of TripCount for partial unrolling. - Count = (std::max(PartialThreshold, 3u)-2) / (LoopSize-2); - while (Count != 0 && TripCount % Count != 0) - Count--; - } - } else if (Unrolling == Runtime) { - if (!AllowRuntime && !CountSetExplicitly) { - DEBUG(dbgs() << " will not try to unroll loop with runtime trip count " - << "-unroll-runtime not given\n"); - return false; - } - // Reduce unroll count to be the largest power-of-two factor of - // the original count which satisfies the threshold limit. - while (Count != 0 && UnrolledSize > PartialThreshold) { - Count >>= 1; - UnrolledSize = (LoopSize-2) * Count + 2; - } - if (Count > UP.MaxCount) - Count = UP.MaxCount; - DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n"); + if (Desc.HasUnrollFullPragma && (Unrolling != Full)) { + DEBUG(dbgs() << " Won't runtime unroll with unroll full pragma."); + if (Unrolling == Runtime) + Desc.emitMissedRemark("loop has runtime trip count"); + else + // Partial unrolling. + Desc.emitMissedRemark("loop too large and/or trip count too high"); + return false; } - if (HasPragma) { - if (PragmaCount != 0) - // If loop has an unroll count pragma mark loop as unrolled to prevent - // unrolling beyond that requested by the pragma. - SetLoopAlreadyUnrolled(L); - - // Emit optimization remarks if we are unable to unroll the loop - // as directed by a pragma. - DebugLoc LoopLoc = L->getStartLoc(); - Function *F = Header->getParent(); - LLVMContext &Ctx = F->getContext(); - if ((PragmaCount > 0) && Count != OriginalCount) { - emitOptimizationRemarkMissed( - Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to unroll loop the number of times directed by " - "unroll_count pragma because unrolled size is too large."); - } else if (PragmaFullUnroll && !TripCount) { - emitOptimizationRemarkMissed( - Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to fully unroll loop as directed by unroll(full) pragma " - "because loop has a runtime trip count."); - } else if (PragmaEnableUnroll && Count != TripCount && Count < 2) { - emitOptimizationRemarkMissed( - Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to unroll loop as directed by unroll(enable) pragma because " - "unrolled size is too large."); - } else if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount && - Count != TripCount) { - emitOptimizationRemarkMissed( - Ctx, DEBUG_TYPE, *F, LoopLoc, - "Unable to fully unroll loop as directed by unroll pragma because " - "unrolled size is too large."); - } + if (Unrolling == Runtime && !Limits.AllowRuntime && !countSetExplicitly) { + DEBUG(dbgs() << " Runtime unrolling not allowed.\n"); + Desc.emitMissedRemark( + "loop has runtime trip count and -unroll-runtime not given"); + return false; } - if (Unrolling != Full && Count < 2) { - // Partial unrolling by 1 is a nop. For full unrolling, a factor - // of 1 makes sense because loop control can be eliminated. + if (Unrolling == Partial && !Limits.AllowPartial && !countSetExplicitly) { + DEBUG(dbgs() << " Partial unrolling not allowed.\n"); + Desc.emitMissedRemark( + "loop too large to unroll fully and -unroll-allow-partial not given"); return false; } - // Unroll the loop. - if (!UnrollLoop(L, Count, TripCount, AllowRuntime, UP.AllowExpensiveTripCount, - TripMultiple, LI, this, &LPM, &AC)) - return false; + bool LoopUnrolled = UnrollLoop(L, Count, Desc.TripCount, Limits.AllowRuntime, + UP.AllowExpensiveTripCount, Desc.TripMultiple, + LI, this, &LPM, AC); + if (LoopUnrolled) { + // emitOptimizationRemark is called by UnrollLoop so no need to call it + // here. + + if (Unrolling != Full && + (Desc.PragmaUnrollCount > 0 || Desc.HasUnrollEnablePragma)) + // If loop still exists mark loop as unrolled (add disable unroll + // metadata) for loops with unroll enabling pragmas. For the unroll count + // case we don't want to unroll more than the pragma indicates. For the + // enable pragma further unrolling is unlikely to be beneficial and will + // likely fail resulting in an emission of a spurious warning. + SetLoopAlreadyUnrolled(L); + + if (InitialCount != Count && countSetExplicitly) + Desc.emitMissedRemark("loop too large or unroll factor too high", Count); + } else { + DEBUG(dbgs() << " Unrolling failed\n"); + // FIXME: It would be better if the reason unrolling failed was provided by + // UnrollLoop. + Desc.emitMissedRemark("loop has a form which cannot be unrolled"); + } - return true; + return LoopUnrolled; } Index: test/Transforms/LoopUnroll/unroll-pragmas.ll =================================================================== --- test/Transforms/LoopUnroll/unroll-pragmas.ll +++ test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -1,5 +1,6 @@ ; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s ; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s +; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -pass-remarks=loop-unroll -pass-remarks-missed=loop-unroll -pass-remarks-analysis=loop-unroll -S 2>&1 | FileCheck -check-prefix=REMARK %s ; ; Run loop unrolling twice to verify that loop unrolling metadata is properly ; removed and further unrolling is disabled after the pass is run once. @@ -13,6 +14,9 @@ ; ; CHECK-LABEL: @loop4( ; CHECK-NOT: br i1 +; +; REMARK: remark: {{.*}}: candidate loop for unrolling: loop body size = 6, trip count = 4 +; REMARK: remark: {{.*}}: completely unrolled loop with 4 iterations define void @loop4(i32* nocapture %a) { entry: br label %for.body @@ -37,6 +41,9 @@ ; CHECK: store i32 ; CHECK-NOT: store i32 ; CHECK: br i1 +; +; REMARK: remark: {{.*}}: candidate loop for unrolling: loop body size = 6, trip count = 4, loop has disable unrolling pragma +; REMARK: remark: {{.*}}: loop not unrolled: unrolling is explicitly disabled define void @loop4_with_disable(i32* nocapture %a) { entry: br label %for.body @@ -65,6 +72,9 @@ ; CHECK: store i32 ; CHECK-NOT: store i32 ; CHECK: br i1 +; +; REMARK: remark: {{.*}}: candidate loop for unrolling: loop body size = 6, trip count = 64 +; REMARK: remark: {{.*}}: loop not unrolled: loop too large to unroll fully and -unroll-allow-partial not given define void @loop64(i32* nocapture %a) { entry: br label %for.body @@ -88,6 +98,9 @@ ; ; CHECK-LABEL: @loop64_with_full( ; CHECK-NOT: br i1 +; +; REMARK: remark: {{.*}}: candidate loop for unrolling: loop body size = 6, trip count = 64 +; REMARK: remark: {{.*}}: completely unrolled loop with 64 iterations define void @loop64_with_full(i32* nocapture %a) { entry: br label %for.body @@ -118,6 +131,9 @@ ; CHECK: store i32 ; CHECK-NOT: store i32 ; CHECK: br i1 +; +; REMARK: remark: {{.*}}: candidate loop for unrolling: loop body size = 6, trip count = 64, loop has pragma suggesting an unroll factor of 4 +; REMARK: remark: {{.*}}: unrolled loop by a factor of 4 with a breakout at trip 0 define void @loop64_with_count4(i32* nocapture %a) { entry: br label %for.body @@ -145,6 +161,10 @@ ; CHECK-LABEL: @runtime_loop_with_full( ; CHECK: store i32 ; CHECK-NOT: store i32 +; +; REMARK: remark: {{.*}}: candidate loop for unrolling: loop body size = 6, trip count is unknown, loop has full unroll pragma +; REMARK: remark: {{.*}}: loop not unrolled: loop has runtime trip count +; REMARK: warning: {{.*}}: loop has full unroll pragma but was not unrolled: loop has runtime trip count define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) { entry: %cmp3 = icmp sgt i32 %b, 0 @@ -182,6 +202,9 @@ ; CHECK: store ; CHECK-NOT: store ; CHECK: br i1 +; +; REMARK: remark: {{.*}}: candidate loop for unrolling: loop body size = 6, trip count is unknown, loop has pragma suggesting an unroll factor of 4 +; REMARK: remark: {{.*}}: unrolled loop by a factor of 4 with run-time trip count define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) { entry: %cmp3 = icmp sgt i32 %b, 0 @@ -210,6 +233,9 @@ ; CHECK: store i32 ; CHECK-NOT: store i32 ; CHECK: br i1 +; +; REMARK: remark: {{.*}}: candidate loop for unrolling: loop body size = 6, trip count = 4, loop has pragma suggesting an unroll factor of 1 +; REMARK: remark: {{.*}}: loop not unrolled: unrolling with a factor of one does nothing define void @unroll_1(i32* nocapture %a, i32 %b) { entry: br label %for.body @@ -232,12 +258,17 @@ ; #pragma clang loop unroll(full) ; Loop has very high loop count (1 million) and full unrolling was requested. -; Loop should unrolled up to the pragma threshold, but not completely. +; Unrolled size exceeds the pragma limit so no unrolling should be performed. +; Pragma unroll(full) means the loop should be fully unrolled or not at all ; ; CHECK-LABEL: @unroll_1M( ; CHECK: store i32 -; CHECK: store i32 +; CHECK-NOT: store i32 ; CHECK: br i1 +; +; REMARK: remark: {{.*}}: candidate loop for unrolling: loop body size = 6, trip count = 1000000, loop has full unroll pragma +; REMARK: remark: {{.*}}: loop not unrolled: loop too large and/or trip count too high +; REMARK: warning: {{.*}}: loop has full unroll pragma but was not unrolled: loop too large and/or trip count too high define void @unroll_1M(i32* nocapture %a, i32 %b) { entry: br label %for.body @@ -262,6 +293,9 @@ ; ; CHECK-LABEL: @loop64_with_enable( ; CHECK-NOT: br i1 +; +; REMARK: remark: {{.*}}: candidate loop for unrolling: loop body size = 6, trip count = 64, loop has enable unrolling pragma +; REMARK: remark: {{.*}}: completely unrolled loop with 64 iterations define void @loop64_with_enable(i32* nocapture %a) { entry: br label %for.body @@ -302,6 +336,10 @@ ; CHECK: store i32 ; CHECK-NOT: store i32 ; CHECK: br i1 +; +; REMARK: remark: {{.*}}: candidate loop for unrolling: loop body size = 6, trip count is unknown, loop has enable unrolling pragma +; REMARK: remark: {{.*}}: unrolled loop by a factor of 8 with run-time trip count +; REMARK-NOT: remark: define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) { entry: %cmp3 = icmp sgt i32 %b, 0