diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1233,10 +1233,11 @@ AssumptionCache *AC, OptimizationRemarkEmitter *ORE, const Function *F, const LoopVectorizeHints *Hints, - InterleavedAccessInfo &IAI) + InterleavedAccessInfo &IAI, + GeneratedRTChecks &RTChecks) : ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F), - Hints(Hints), InterleaveInfo(IAI) {} + Hints(Hints), InterleaveInfo(IAI), RTChecks(RTChecks) {} /// \return An upper bound for the vectorization factors (both fixed and /// scalable). If the factors are 0, vectorization and interleaving should be @@ -1635,6 +1636,17 @@ Scalars.clear(); } + /// The vectorization cost is a combination of the cost itself and a boolean + /// indicating whether any of the contributing operations will actually + /// operate on vector values after type legalization in the backend. If this + /// latter value is false, then all operations will be scalarized (i.e. no + /// vectorization has actually taken place). + using VectorizationCostTy = std::pair; + + /// Returns the execution time cost of an instruction for a given vector + /// width. Vector width of one means scalar. + VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); + private: unsigned NumPredStores = 0; @@ -1663,13 +1675,6 @@ /// of elements. ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements); - /// The vectorization cost is a combination of the cost itself and a boolean - /// indicating whether any of the contributing operations will actually - /// operate on vector values after type legalization in the backend. If this - /// latter value is false, then all operations will be scalarized (i.e. no - /// vectorization has actually taken place). - using VectorizationCostTy = std::pair; - /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different /// vector widths. The cost that is returned is *not* normalized by @@ -1681,10 +1686,6 @@ expectedCost(ElementCount VF, SmallVectorImpl *Invalid = nullptr); - /// Returns the execution time cost of an instruction for a given vector - /// width. Vector width of one means scalar. - VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); - /// The cost-computation logic from getInstructionCost which provides /// the vector type as an output parameter. InstructionCost getInstructionCost(Instruction *I, ElementCount VF, @@ -1891,6 +1892,10 @@ /// with the same stride and close to each other. InterleavedAccessInfo &InterleaveInfo; + /// Structure to hold information about generated runtime checks, responsible + /// for cleaning the checks, if vectorization turns out unprofitable. + GeneratedRTChecks &RTChecks; + /// Values to ignore in the cost model. SmallPtrSet ValuesToIgnore; @@ -2050,6 +2055,26 @@ return MemCheckBlock; } + InstructionCost getCost(LoopVectorizationCostModel &CM) { + generateChecks(); + InstructionCost RTCheckCost = 0; + if (SCEVCheckBlock) + for (Instruction &I : *SCEVCheckBlock) { + if (SCEVCheckBlock->getTerminator() == &I) + continue; + RTCheckCost += + CM.getInstructionCost(&I, ElementCount::getFixed(1)).first; + } + if (MemCheckBlock) + for (Instruction &I : *MemCheckBlock) { + if (MemCheckBlock->getTerminator() == &I) + continue; + RTCheckCost += + CM.getInstructionCost(&I, ElementCount::getFixed(1)).first; + } + return RTCheckCost; + } + private: /// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can /// accurately estimate the cost of the runtime checks. The blocks are @@ -6063,7 +6088,12 @@ "Expected Scalar VF to be a candidate"); const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost); - VectorizationFactor ChosenFactor = ScalarCost; + + InstructionCost RTCost = RTChecks.getCost(*this); + assert(RTCost.isValid() && "Unexpected invalid cost for runtime checks"); + + VectorizationFactor ChosenFactor = {ScalarCost.Width, + ScalarCost.Cost - RTCost}; bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; if (ForceVectorization && VFCandidates.size() > 1) { @@ -6073,6 +6103,9 @@ ChosenFactor.Cost = InstructionCost::getMax(); } + LLVM_DEBUG(dbgs() << "LV: Adjusted scalar loop costs: " << ChosenFactor.Cost + << ".\n"); + SmallVector InvalidCosts; for (const auto &i : VFCandidates) { // The cost for scalar VF=1 is already calculated, so ignore it. @@ -6162,6 +6195,9 @@ } while (!Tail.empty()); } + if (ChosenFactor.Width.isScalar()) + ChosenFactor = ScalarCost; + if (!EnableCondStoresVectorization && NumPredStores) { reportVectorizationFailure("There are conditional stores.", "store that is conditionally executed prevents vectorization", @@ -10052,7 +10088,7 @@ GeneratedRTChecks RTChecks(PSE, *LVL, DT, LI, L, F->getParent()->getDataLayout()); LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F, - &Hints, IAI); + &Hints, IAI, RTChecks); // Use the planner for outer loop vectorization. // TODO: CM is not used at this point inside the planner. Turn CM into an // optional argument if we don't need it in the future. @@ -10289,7 +10325,7 @@ F->getParent()->getDataLayout()); // Use the cost model. LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, - F, &Hints, IAI); + F, &Hints, IAI, RTChecks); CM.collectValuesToIgnore(); CM.collectElementTypesForWidening();