Index: include/llvm/Transforms/Utils/LoopWidening.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Utils/LoopWidening.h @@ -0,0 +1,80 @@ +//===- LoopWidening.h - Loop vectorization and interleaving ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines utility classes for widening loops - coalescing multiple +// iterations together. It provides utilities for vectorizing and interleaving +// loops. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_LOOP_WIDENING_H +#define LLVM_TRANSFORMS_UTILS_LOOP_WIDENING_H + +namespace llvm { + +class Loop; +class ScalarEvolution; +class LoopInfo; +class DominatorTree; +class TargetLibraryInfo; +class TargetTransformInfo; +class LoopAccessAnalysis; +class BlockFrequencyInfo; +class BlockFrequency; +class AssumptionCache; + +/// Performs vectorization on a loop. +class LoopVectorization { +public: + /// Creates a new LoopVectorization utility. + /// \param DisableUnrolling Disable interleaving completely, overriding the + /// cost model and loop hints. + /// \param AlwaysVectorize Forces vectorization, like #pragma vectorize enable. + LoopVectorization(ScalarEvolution *SE, LoopInfo *LI, + DominatorTree *DT, + const TargetTransformInfo *TTI, + LoopAccessAnalysis *LAA, + AliasAnalysis *AA, + AssumptionCache *AC, + BlockFrequencyInfo *BFI = nullptr, + TargetLibraryInfo *TLI = nullptr, + bool DisableUnrolling = false, + bool AlwaysVectorize = false); + + bool vectorize(Loop *L, int VectorWidth = -1, int UnrollFactor = -1); + +protected: + void AddRuntimeUnrollDisableMetaData(Loop *L); + + ScalarEvolution *SE; + LoopInfo *LI; + DominatorTree *DT; + const TargetTransformInfo *TTI; + LoopAccessAnalysis *LAA; + AliasAnalysis *AA; + AssumptionCache *AC; + BlockFrequencyInfo *BFI; + TargetLibraryInfo *TLI; + BlockFrequency *ColdEntryFreq; + bool DisableUnrolling, AlwaysVectorize; +}; + +class LoopInterleaving : public LoopVectorization { +public: + using LoopVectorization::LoopVectorization; + + bool interleave(Loop *L, int UnrollFactor=-1) { + return vectorize(L, 1, UnrollFactor); + } + +}; + +} + +#endif Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -100,6 +100,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/LoopWidening.h" #include #include #include @@ -1529,8 +1530,6 @@ bool DisableUnrolling; bool AlwaysVectorize; - BlockFrequency ColdEntryFreq; - bool runOnFunction(Function &F) override { SE = &getAnalysis().getSE(); LI = &getAnalysis().getLoopInfo(); @@ -1543,11 +1542,6 @@ AC = &getAnalysis().getAssumptionCache(F); LAA = &getAnalysis(); - // Compute some weights outside of the loop over the loops. Compute this - // using a BranchProbability to re-use its scaling math. - const BranchProbability ColdProb(1, 5); // 20% - ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb; - // Don't attempt if // 1. the target claims to have no vector registers, and // 2. interleaving won't help ILP. @@ -1570,288 +1564,314 @@ // Now walk the identified inner loops. bool Changed = false; - while (!Worklist.empty()) - Changed |= processLoop(Worklist.pop_back_val()); + while (!Worklist.empty()) { + LoopVectorization LV(SE, LI, DT, TTI, LAA, AA, AC, BFI, TLI, + DisableUnrolling, AlwaysVectorize); + Changed |= LV.vectorize(Worklist.pop_back_val()); + } // Process each loop nest in the function. return Changed; } - static void AddRuntimeUnrollDisableMetaData(Loop *L) { - SmallVector MDs; - // Reserve first location for self reference to the LoopID metadata node. - MDs.push_back(nullptr); - bool IsUnrollMetadata = false; - MDNode *LoopID = L->getLoopID(); - if (LoopID) { - // First find existing loop unrolling disable metadata. - for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { - MDNode *MD = dyn_cast(LoopID->getOperand(i)); - if (MD) { - const MDString *S = dyn_cast(MD->getOperand(0)); - IsUnrollMetadata = - S && S->getString().startswith("llvm.loop.unroll.disable"); - } - MDs.push_back(LoopID->getOperand(i)); - } - } - - if (!IsUnrollMetadata) { - // Add runtime unroll disable metadata. - LLVMContext &Context = L->getHeader()->getContext(); - SmallVector DisableOperands; - DisableOperands.push_back( - MDString::get(Context, "llvm.loop.unroll.runtime.disable")); - MDNode *DisableNode = MDNode::get(Context, DisableOperands); - MDs.push_back(DisableNode); - MDNode *NewLoopID = MDNode::get(Context, MDs); - // Set operand 0 to refer to the loop id itself. - NewLoopID->replaceOperandWith(0, NewLoopID); - L->setLoopID(NewLoopID); - } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); } - bool processLoop(Loop *L) { - assert(L->empty() && "Only process inner loops."); +}; +} // end anonymous namespace + +LoopVectorization::LoopVectorization(ScalarEvolution *SE, LoopInfo *LI, + DominatorTree *DT, + const TargetTransformInfo *TTI, + LoopAccessAnalysis *LAA, + AliasAnalysis *AA, + AssumptionCache *AC, + BlockFrequencyInfo *BFI, + TargetLibraryInfo *TLI, + bool DisableUnrolling, + bool AlwaysVectorize) : + SE(SE), LI(LI), DT(DT), TTI(TTI), LAA(LAA), AA(AA), AC(AC), BFI(BFI), + TLI(TLI), + DisableUnrolling(DisableUnrolling), AlwaysVectorize(AlwaysVectorize) { +} + +bool LoopVectorization::vectorize(Loop *L, + int VectorWidth, int UnrollFactor) { + assert(L->empty() && "Only process inner loops."); + assert(VectorWidth == -1 && "Overriding VF not yet implemented!"); + assert(UnrollFactor == -1 && "Overriding UF not yet implemented!"); + #ifndef NDEBUG - const std::string DebugLocStr = getDebugLocString(L); + const std::string DebugLocStr = getDebugLocString(L); #endif /* NDEBUG */ - DEBUG(dbgs() << "\nLV: Checking a loop in \"" - << L->getHeader()->getParent()->getName() << "\" from " - << DebugLocStr << "\n"); - - LoopVectorizeHints Hints(L, DisableUnrolling); - - DEBUG(dbgs() << "LV: Loop hints:" - << " force=" - << (Hints.getForce() == LoopVectorizeHints::FK_Disabled - ? "disabled" - : (Hints.getForce() == LoopVectorizeHints::FK_Enabled - ? "enabled" - : "?")) << " width=" << Hints.getWidth() - << " unroll=" << Hints.getInterleave() << "\n"); - - // Function containing loop - Function *F = L->getHeader()->getParent(); - - // Looking at the diagnostic output is the only way to determine if a loop - // was vectorized (other than looking at the IR or machine code), so it - // is important to generate an optimization remark for each loop. Most of - // these messages are generated by emitOptimizationRemarkAnalysis. Remarks - // generated by emitOptimizationRemark and emitOptimizationRemarkMissed are - // less verbose reporting vectorized loops and unvectorized loops that may - // benefit from vectorization, respectively. - - if (!Hints.allowVectorization(F, L, AlwaysVectorize)) { - DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n"); - return false; - } - - // Check the loop for a trip count threshold: - // do not vectorize loops with a tiny trip count. - const unsigned TC = SE->getSmallConstantTripCount(L); - if (TC > 0u && TC < TinyTripCountVectorThreshold) { - DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " - << "This loop is not worth vectorizing."); - if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) - DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); - else { - DEBUG(dbgs() << "\n"); - emitAnalysisDiag(F, L, Hints, VectorizationReport() - << "vectorization is not beneficial " - "and is not explicitly forced"); - return false; - } - } + DEBUG(dbgs() << "\nLV: Checking a loop in \"" + << L->getHeader()->getParent()->getName() << "\" from " + << DebugLocStr << "\n"); + + LoopVectorizeHints Hints(L, DisableUnrolling); + + DEBUG(dbgs() << "LV: Loop hints:" + << " force=" + << (Hints.getForce() == LoopVectorizeHints::FK_Disabled + ? "disabled" + : (Hints.getForce() == LoopVectorizeHints::FK_Enabled + ? "enabled" + : "?")) << " width=" << Hints.getWidth() + << " unroll=" << Hints.getInterleave() << "\n"); + + // Function containing loop + Function *F = L->getHeader()->getParent(); + + // Looking at the diagnostic output is the only way to determine if a loop + // was vectorized (other than looking at the IR or machine code), so it + // is important to generate an optimization remark for each loop. Most of + // these messages are generated by emitOptimizationRemarkAnalysis. Remarks + // generated by emitOptimizationRemark and emitOptimizationRemarkMissed are + // less verbose reporting vectorized loops and unvectorized loops that may + // benefit from vectorization, respectively. + + if (!Hints.allowVectorization(F, L, AlwaysVectorize)) { + DEBUG(dbgs() << "LV: Loop hints prevent vectorization.\n"); + return false; + } - // Check if it is legal to vectorize the loop. - LoopVectorizationRequirements Requirements; - LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA, - &Requirements, &Hints); - if (!LVL.canVectorize()) { - DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); - emitMissedWarning(F, L, Hints); + // Check the loop for a trip count threshold: + // do not vectorize loops with a tiny trip count. + const unsigned TC = SE->getSmallConstantTripCount(L); + if (TC > 0u && TC < TinyTripCountVectorThreshold) { + DEBUG(dbgs() << "LV: Found a loop with a very small trip count. " + << "This loop is not worth vectorizing."); + if (Hints.getForce() == LoopVectorizeHints::FK_Enabled) + DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); + else { + DEBUG(dbgs() << "\n"); + emitAnalysisDiag(F, L, Hints, VectorizationReport() + << "vectorization is not beneficial " + "and is not explicitly forced"); return false; } + } - // Collect values we want to ignore in the cost model. This includes - // type-promoting instructions we identified during reduction detection. - SmallPtrSet ValuesToIgnore; - CodeMetrics::collectEphemeralValues(L, AC, ValuesToIgnore); - for (auto &Reduction : *LVL.getReductionVars()) { - RecurrenceDescriptor &RedDes = Reduction.second; - SmallPtrSetImpl &Casts = RedDes.getCastInsts(); - ValuesToIgnore.insert(Casts.begin(), Casts.end()); - } + // Check if it is legal to vectorize the loop. + LoopVectorizationRequirements Requirements; + LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA, + &Requirements, &Hints); + if (!LVL.canVectorize()) { + DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); + emitMissedWarning(F, L, Hints); + return false; + } - // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints, - ValuesToIgnore); - - // Check the function attributes to find out if this function should be - // optimized for size. - bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled && - F->optForSize(); - - // Compute the weighted frequency of this loop being executed and see if it - // is less than 20% of the function entry baseline frequency. Note that we - // always have a canonical loop here because we think we *can* vectorize. - // FIXME: This is hidden behind a flag due to pervasive problems with - // exactly what block frequency models. - if (LoopVectorizeWithBlockFrequency) { - BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader()); - if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && - LoopEntryFreq < ColdEntryFreq) - OptForSize = true; - } + // Collect values we want to ignore in the cost model. This includes + // type-promoting instructions we identified during reduction detection. + SmallPtrSet ValuesToIgnore; + CodeMetrics::collectEphemeralValues(L, AC, ValuesToIgnore); + for (auto &Reduction : *LVL.getReductionVars()) { + RecurrenceDescriptor &RedDes = Reduction.second; + SmallPtrSetImpl &Casts = RedDes.getCastInsts(); + ValuesToIgnore.insert(Casts.begin(), Casts.end()); + } - // Check the function attributes to see if implicit floats are allowed. - // FIXME: This check doesn't seem possibly correct -- what if the loop is - // an integer loop and the vector instructions selected are purely integer - // vector instructions? - if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { - DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" - "attribute is used.\n"); - emitAnalysisDiag( - F, L, Hints, - VectorizationReport() - << "loop not vectorized due to NoImplicitFloat attribute"); - emitMissedWarning(F, L, Hints); - return false; - } + // Use the cost model. + LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, AC, F, &Hints, + ValuesToIgnore); - // Select the optimal vectorization factor. - const LoopVectorizationCostModel::VectorizationFactor VF = - CM.selectVectorizationFactor(OptForSize); + // Check the function attributes to find out if this function should be + // optimized for size. + bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled && + F->optForSize(); - // Select the interleave count. - unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost); + // Compute the weighted frequency of this loop being executed and see if it + // is less than 20% of the function entry baseline frequency. Note that we + // always have a canonical loop here because we think we *can* vectorize. + // FIXME: This is hidden behind a flag due to pervasive problems with + // exactly what block frequency models. + if (LoopVectorizeWithBlockFrequency && BFI) { + // Compute some weights outside of the loop over the loops. Compute this + // using a BranchProbability to re-use its scaling math. + const BranchProbability ColdProb(1, 5); // 20% + auto ColdEntryFreq = BlockFrequency(BFI->getEntryFreq()) * ColdProb; + + BlockFrequency LoopEntryFreq = BFI->getBlockFreq(L->getLoopPreheader()); + if (Hints.getForce() != LoopVectorizeHints::FK_Enabled && + LoopEntryFreq < ColdEntryFreq) + OptForSize = true; + } + + // Check the function attributes to see if implicit floats are allowed. + // FIXME: This check doesn't seem possibly correct -- what if the loop is + // an integer loop and the vector instructions selected are purely integer + // vector instructions? + if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { + DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" + "attribute is used.\n"); + emitAnalysisDiag( + F, L, Hints, + VectorizationReport() + << "loop not vectorized due to NoImplicitFloat attribute"); + emitMissedWarning(F, L, Hints); + return false; + } - // Get user interleave count. - unsigned UserIC = Hints.getInterleave(); + // Select the optimal vectorization factor. + const LoopVectorizationCostModel::VectorizationFactor VF = + CM.selectVectorizationFactor(OptForSize); - // Identify the diagnostic messages that should be produced. - std::string VecDiagMsg, IntDiagMsg; - bool VectorizeLoop = true, InterleaveLoop = true; + // Select the interleave count. + unsigned IC = CM.selectInterleaveCount(OptForSize, VF.Width, VF.Cost); - if (Requirements.doesNotMeet(F, L, Hints)) { - DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization " - "requirements.\n"); - emitMissedWarning(F, L, Hints); - return false; - } + // Get user interleave count. + unsigned UserIC = Hints.getInterleave(); - if (VF.Width == 1) { - DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); - VecDiagMsg = - "the cost-model indicates that vectorization is not beneficial"; - VectorizeLoop = false; - } + // Identify the diagnostic messages that should be produced. + std::string VecDiagMsg, IntDiagMsg; + bool VectorizeLoop = true, InterleaveLoop = true; - if (IC == 1 && UserIC <= 1) { - // Tell the user interleaving is not beneficial. - DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n"); - IntDiagMsg = - "the cost-model indicates that interleaving is not beneficial"; - InterleaveLoop = false; - if (UserIC == 1) - IntDiagMsg += - " and is explicitly disabled or interleave count is set to 1"; - } else if (IC > 1 && UserIC == 1) { - // Tell the user interleaving is beneficial, but it explicitly disabled. - DEBUG(dbgs() - << "LV: Interleaving is beneficial but is explicitly disabled."); - IntDiagMsg = "the cost-model indicates that interleaving is beneficial " - "but is explicitly disabled or interleave count is set to 1"; - InterleaveLoop = false; - } + if (Requirements.doesNotMeet(F, L, Hints)) { + DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization " + "requirements.\n"); + emitMissedWarning(F, L, Hints); + return false; + } - // Override IC if user provided an interleave count. - IC = UserIC > 0 ? UserIC : IC; - - // Emit diagnostic messages, if any. - const char *VAPassName = Hints.vectorizeAnalysisPassName(); - if (!VectorizeLoop && !InterleaveLoop) { - // Do not vectorize or interleaving the loop. - emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F, - L->getStartLoc(), VecDiagMsg); - emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F, - L->getStartLoc(), IntDiagMsg); - return false; - } else if (!VectorizeLoop && InterleaveLoop) { - DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); - emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F, - L->getStartLoc(), VecDiagMsg); - } else if (VectorizeLoop && !InterleaveLoop) { - DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " - << DebugLocStr << '\n'); - emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F, - L->getStartLoc(), IntDiagMsg); - } else if (VectorizeLoop && InterleaveLoop) { - DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " - << DebugLocStr << '\n'); - DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); - } + if (VF.Width == 1) { + DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); + VecDiagMsg = + "the cost-model indicates that vectorization is not beneficial"; + VectorizeLoop = false; + } + + if (IC == 1 && UserIC <= 1) { + // Tell the user interleaving is not beneficial. + DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n"); + IntDiagMsg = + "the cost-model indicates that interleaving is not beneficial"; + InterleaveLoop = false; + if (UserIC == 1) + IntDiagMsg += + " and is explicitly disabled or interleave count is set to 1"; + } else if (IC > 1 && UserIC == 1) { + // Tell the user interleaving is beneficial, but it explicitly disabled. + DEBUG(dbgs() + << "LV: Interleaving is beneficial but is explicitly disabled."); + IntDiagMsg = "the cost-model indicates that interleaving is beneficial " + "but is explicitly disabled or interleave count is set to 1"; + InterleaveLoop = false; + } + + // Override IC if user provided an interleave count. + IC = UserIC > 0 ? UserIC : IC; + + // Emit diagnostic messages, if any. + const char *VAPassName = Hints.vectorizeAnalysisPassName(); + if (!VectorizeLoop && !InterleaveLoop) { + // Do not vectorize or interleaving the loop. + emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F, + L->getStartLoc(), VecDiagMsg); + emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F, + L->getStartLoc(), IntDiagMsg); + return false; + } else if (!VectorizeLoop && InterleaveLoop) { + DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); + emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F, + L->getStartLoc(), VecDiagMsg); + } else if (VectorizeLoop && !InterleaveLoop) { + DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " + << DebugLocStr << '\n'); + emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F, + L->getStartLoc(), IntDiagMsg); + } else if (VectorizeLoop && InterleaveLoop) { + DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in " + << DebugLocStr << '\n'); + DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n'); + } + + if (!VectorizeLoop) { + assert(IC > 1 && "interleave count should not be 1 or 0"); + // If we decided that it is not legal to vectorize the loop then + // interleave it. + InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC); + Unroller.vectorize(&LVL); + + emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), + Twine("interleaved loop (interleaved count: ") + + Twine(IC) + ")"); + } else { + // If we decided that it is *legal* to vectorize the loop then do it. + InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC); + LB.vectorize(&LVL); + ++LoopsVectorized; - if (!VectorizeLoop) { - assert(IC > 1 && "interleave count should not be 1 or 0"); - // If we decided that it is not legal to vectorize the loop then - // interleave it. - InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC); - Unroller.vectorize(&LVL); + // Add metadata to disable runtime unrolling scalar loop when there's no + // runtime check about strides and memory. Because at this situation, + // scalar loop is rarely used not worthy to be unrolled. + if (!LB.IsSafetyChecksAdded()) + AddRuntimeUnrollDisableMetaData(L); - emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), - Twine("interleaved loop (interleaved count: ") + - Twine(IC) + ")"); - } else { - // If we decided that it is *legal* to vectorize the loop then do it. - InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC); - LB.vectorize(&LVL); - ++LoopsVectorized; - - // Add metadata to disable runtime unrolling scalar loop when there's no - // runtime check about strides and memory. Because at this situation, - // scalar loop is rarely used not worthy to be unrolled. - if (!LB.IsSafetyChecksAdded()) - AddRuntimeUnrollDisableMetaData(L); - - // Report the vectorization decision. - emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), - Twine("vectorized loop (vectorization width: ") + - Twine(VF.Width) + ", interleaved count: " + - Twine(IC) + ")"); - } + // Report the vectorization decision. + emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), + Twine("vectorized loop (vectorization width: ") + + Twine(VF.Width) + ", interleaved count: " + + Twine(IC) + ")"); + } - // Mark the loop as already vectorized to avoid vectorizing again. - Hints.setAlreadyVectorized(); + // Mark the loop as already vectorized to avoid vectorizing again. + Hints.setAlreadyVectorized(); - DEBUG(verifyFunction(*L->getHeader()->getParent())); - return true; + DEBUG(verifyFunction(*L->getHeader()->getParent())); + return true; +} + +void LoopVectorization::AddRuntimeUnrollDisableMetaData(Loop *L) { + SmallVector MDs; + // Reserve first location for self reference to the LoopID metadata node. + MDs.push_back(nullptr); + bool IsUnrollMetadata = false; + MDNode *LoopID = L->getLoopID(); + if (LoopID) { + // First find existing loop unrolling disable metadata. + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *MD = dyn_cast(LoopID->getOperand(i)); + if (MD) { + const MDString *S = dyn_cast(MD->getOperand(0)); + IsUnrollMetadata = + S && S->getString().startswith("llvm.loop.unroll.disable"); + } + MDs.push_back(LoopID->getOperand(i)); + } } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequiredID(LoopSimplifyID); - AU.addRequiredID(LCSSAID); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); - AU.addPreserved(); + if (!IsUnrollMetadata) { + // Add runtime unroll disable metadata. + LLVMContext &Context = L->getHeader()->getContext(); + SmallVector DisableOperands; + DisableOperands.push_back( + MDString::get(Context, "llvm.loop.unroll.runtime.disable")); + MDNode *DisableNode = MDNode::get(Context, DisableOperands); + MDs.push_back(DisableNode); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + L->setLoopID(NewLoopID); } - -}; - -} // end anonymous namespace +} //===----------------------------------------------------------------------===// // Implementation of LoopVectorizationLegality, InnerLoopVectorizer and