Index: llvm/trunk/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar.h +++ llvm/trunk/include/llvm/Transforms/Scalar.h @@ -183,11 +183,12 @@ // // LoopUnroll - This pass is a simple loop unrolling pass. // -Pass *createLoopUnrollPass(int OptLevel = 2, int Threshold = -1, int Count = -1, +Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false, + int Threshold = -1, int Count = -1, int AllowPartial = -1, int Runtime = -1, int UpperBound = -1, int AllowPeeling = -1); // Create an unrolling pass for full unrolling that uses exact trip count only. -Pass *createSimpleLoopUnrollPass(int OptLevel = 2); +Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false); //===----------------------------------------------------------------------===// // Index: llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h +++ llvm/trunk/include/llvm/Transforms/Scalar/LoopUnrollPass.h @@ -24,8 +24,14 @@ class LoopFullUnrollPass : public PassInfoMixin { const int OptLevel; + /// If false, use a cost model to determine whether unrolling of a loop is + /// profitable. If true, only loops that explicitly request unrolling via + /// metadata are considered. All other loops are skipped. + const bool OnlyWhenForced; + public: - explicit LoopFullUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {} + explicit LoopFullUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false) + : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {} PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &U); @@ -50,7 +56,13 @@ Optional AllowUpperBound; int OptLevel; - LoopUnrollOptions(int OptLevel = 2) : OptLevel(OptLevel) {} + /// If false, use a cost model to determine whether unrolling of a loop is + /// profitable. If true, only loops that explicitly request unrolling via + /// metadata are considered. All other loops are skipped. + bool OnlyWhenForced; + + LoopUnrollOptions(int OptLevel = 2, bool OnlyWhenForced = false) + : OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced) {} /// Enables or disables partial unrolling. When disabled only full unrolling /// is allowed. Index: llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/trunk/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -378,8 +378,8 @@ if (EnableLoopInterchange) MPM.add(createLoopInterchangePass()); // Interchange loops - if (!DisableUnrollLoops) - MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops + MPM.add(createSimpleLoopUnrollPass(OptLevel, + DisableUnrollLoops)); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); // This ends the loop pass pipelines. @@ -682,16 +682,17 @@ addExtensionsToPM(EP_Peephole, MPM); addInstructionCombiningPass(MPM); - if (!DisableUnrollLoops) { - if (EnableUnrollAndJam) { - // Unroll and Jam. We do this before unroll but need to be in a separate - // loop pass manager in order for the outer loop to be processed by - // unroll and jam before the inner loop is unrolled. - MPM.add(createLoopUnrollAndJamPass(OptLevel)); - } + if (EnableUnrollAndJam && !DisableUnrollLoops) { + // Unroll and Jam. We do this before unroll but need to be in a separate + // loop pass manager in order for the outer loop to be processed by + // unroll and jam before the inner loop is unrolled. + MPM.add(createLoopUnrollAndJamPass(OptLevel)); + } - MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops + MPM.add(createLoopUnrollPass(OptLevel, + DisableUnrollLoops)); // Unroll small loops + if (!DisableUnrollLoops) { // LoopUnroll may generate some redundency to cleanup. addInstructionCombiningPass(MPM); @@ -700,7 +701,7 @@ // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. MPM.add(createLICMPass()); - } + } MPM.add(createWarnMissedTransformationsPass()); @@ -872,12 +873,11 @@ if (EnableLoopInterchange) PM.add(createLoopInterchangePass()); - if (!DisableUnrollLoops) - PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops + PM.add(createSimpleLoopUnrollPass(OptLevel, + DisableUnrollLoops)); // Unroll small loops PM.add(createLoopVectorizePass(true, LoopVectorize)); // The vectorizer may have significantly shortened a loop body; unroll again. - if (!DisableUnrollLoops) - PM.add(createLoopUnrollPass(OptLevel)); + PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops)); PM.add(createWarnMissedTransformationsPass()); Index: llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -965,13 +965,15 @@ Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, const TargetTransformInfo &TTI, AssumptionCache &AC, OptimizationRemarkEmitter &ORE, bool PreserveLCSSA, int OptLevel, - Optional ProvidedCount, Optional ProvidedThreshold, - Optional ProvidedAllowPartial, Optional ProvidedRuntime, - Optional ProvidedUpperBound, Optional ProvidedAllowPeeling) { + bool OnlyWhenForced, Optional ProvidedCount, + Optional ProvidedThreshold, Optional ProvidedAllowPartial, + Optional ProvidedRuntime, Optional ProvidedUpperBound, + Optional ProvidedAllowPeeling) { LLVM_DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); - if (hasUnrollTransformation(L) & TM_Disable) + TransformationMode TM = hasUnrollTransformation(L); + if (TM & TM_Disable) return LoopUnrollResult::Unmodified; if (!L->isLoopSimplifyForm()) { LLVM_DEBUG( @@ -979,6 +981,11 @@ return LoopUnrollResult::Unmodified; } + // When automtatic unrolling is disabled, do not unroll unless overridden for + // this loop. + if (OnlyWhenForced && !(TM & TM_Enable)) + return LoopUnrollResult::Unmodified; + unsigned NumInlineCandidates; bool NotDuplicatable; bool Convergent; @@ -1119,6 +1126,12 @@ static char ID; // Pass ID, replacement for typeid int OptLevel; + + /// If false, use a cost model to determine whether unrolling of a loop is + /// profitable. If true, only loops that explicitly request unrolling via + /// metadata are considered. All other loops are skipped. + bool OnlyWhenForced; + Optional ProvidedCount; Optional ProvidedThreshold; Optional ProvidedAllowPartial; @@ -1126,15 +1139,16 @@ Optional ProvidedUpperBound; Optional ProvidedAllowPeeling; - LoopUnroll(int OptLevel = 2, Optional Threshold = None, + LoopUnroll(int OptLevel = 2, bool OnlyWhenForced = false, + Optional Threshold = None, Optional Count = None, Optional AllowPartial = None, Optional Runtime = None, Optional UpperBound = None, Optional AllowPeeling = None) - : LoopPass(ID), OptLevel(OptLevel), ProvidedCount(std::move(Count)), - ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial), - ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound), - ProvidedAllowPeeling(AllowPeeling) { + : LoopPass(ID), OptLevel(OptLevel), OnlyWhenForced(OnlyWhenForced), + ProvidedCount(std::move(Count)), ProvidedThreshold(Threshold), + ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime), + ProvidedUpperBound(UpperBound), ProvidedAllowPeeling(AllowPeeling) { initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -1157,8 +1171,8 @@ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); LoopUnrollResult Result = tryToUnrollLoop( - L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, ProvidedCount, - ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, + L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA, OptLevel, OnlyWhenForced, + ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling); if (Result == LoopUnrollResult::FullyUnrolled) @@ -1188,14 +1202,16 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) -Pass *llvm::createLoopUnrollPass(int OptLevel, int Threshold, int Count, - int AllowPartial, int Runtime, int UpperBound, +Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced, + int Threshold, int Count, int AllowPartial, + int Runtime, int UpperBound, int AllowPeeling) { // TODO: It would make more sense for this function to take the optionals // directly, but that's dangerous since it would silently break out of tree // callers. return new LoopUnroll( - OptLevel, Threshold == -1 ? None : Optional(Threshold), + OptLevel, OnlyWhenForced, + Threshold == -1 ? None : Optional(Threshold), Count == -1 ? None : Optional(Count), AllowPartial == -1 ? None : Optional(AllowPartial), Runtime == -1 ? None : Optional(Runtime), @@ -1203,8 +1219,8 @@ AllowPeeling == -1 ? None : Optional(AllowPeeling)); } -Pass *llvm::createSimpleLoopUnrollPass(int OptLevel) { - return createLoopUnrollPass(OptLevel, -1, -1, 0, 0, 0, 0); +Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced) { + return createLoopUnrollPass(OptLevel, OnlyWhenForced, -1, -1, 0, 0, 0, 0); } PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, @@ -1234,7 +1250,8 @@ bool Changed = tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, *ORE, - /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None, + /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced, + /*Count*/ None, /*Threshold*/ None, /*AllowPartial*/ false, /*Runtime*/ false, /*UpperBound*/ false, /*AllowPeeling*/ false) != LoopUnrollResult::Unmodified; @@ -1371,7 +1388,8 @@ // flavors of unrolling during construction time (by setting UnrollOpts). LoopUnrollResult Result = tryToUnrollLoop( &L, DT, &LI, SE, TTI, AC, ORE, - /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*Count*/ None, + /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced, + /*Count*/ None, /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling); Changed |= Result != LoopUnrollResult::Unmodified; Index: llvm/trunk/test/Transforms/LoopUnroll/disable-loop-unrolling_forced.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/disable-loop-unrolling_forced.ll +++ llvm/trunk/test/Transforms/LoopUnroll/disable-loop-unrolling_forced.ll @@ -0,0 +1,30 @@ +; RUN: opt -disable-loop-unrolling -O1 -S < %s | FileCheck %s +; +; Check loop unrolling metadata is honored despite automatic unrolling +; being disabled in the pass builder. +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; CHECK-LABEL: @forced( +; CHECK: load +; CHECK: load +define void @forced(i32* nocapture %a) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 + +for.end: + ret void +} + +!0 = distinct !{!0, !{!"llvm.loop.unroll.enable"}, + !{!"llvm.loop.unroll.count", i32 2}}