Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -83,6 +83,7 @@ #include "llvm/Analysis/CodeMetrics.h" #include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionCost.h" #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopAnalysisManager.h" #include "llvm/Analysis/LoopInfo.h" @@ -1451,7 +1452,7 @@ /// is /// false, then all operations will be scalarized (i.e. no vectorization has /// actually taken place). - using VectorizationCostTy = std::pair; + using VectorizationCostTy = std::pair; /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different @@ -1465,7 +1466,8 @@ /// The cost-computation logic from getInstructionCost which provides /// the vector type as an output parameter. - unsigned getInstructionCost(Instruction *I, ElementCount VF, Type *&VectorTy); + InstructionCost getInstructionCost(Instruction *I, ElementCount VF, + Type *&VectorTy); /// Calculate vectorization cost of memory instruction \p I. unsigned getMemoryInstructionCost(Instruction *I, ElementCount VF); @@ -1509,7 +1511,7 @@ /// A type representing the costs for instructions if they were to be /// scalarized rather than vectorized. The entries are Instruction-Cost /// pairs. - using ScalarCostsTy = DenseMap; + using ScalarCostsTy = DenseMap; /// A set containing all BasicBlocks that are known to present after /// vectorization as a predicated block. @@ -5408,23 +5410,27 @@ LoopVectorizationCostModel::selectVectorizationFactor(ElementCount MaxVF) { assert(!MaxVF.isScalable() && "scalable vectors not yet supported"); - unsigned ExpectedCost = expectedCost(ElementCount::getFixed(1)).first; + InstructionCost ExpectedCost = expectedCost(ElementCount::getFixed(1)).first; LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n"); - std::pair MinCost = {ExpectedCost, 1}; - const std::pair ScalarCost = MinCost; + std::pair MinCost = {ExpectedCost, 1}; + const std::pair ScalarCost = MinCost; bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; if (ForceVectorization && MaxVF.isVector()) { // Ignore scalar width, because the user explicitly wants vectorization. // Initialize cost to max so that VF = 2 is, at least, chosen during cost // evaluation. - MinCost.first = std::numeric_limits::max(); + MinCost.first = InstructionCost::getInvalid(); } - auto isLowerVectorCost = [](const std::pair LHS, - const std::pair RHS) { - return (float(LHS.first) / LHS.second) < (float(RHS.first) / RHS.second); + auto isLowerVectorCost = [](const std::pair LHS, + const std::pair RHS) { + // We're trying to compare: + // Cost1 / Cost1Width < Cost2 / Cost2Width + // which is the same as: + // Cost1 * Cost2Width < Cost2 * Cost1Width + return (LHS.first * RHS.second) < (RHS.first * LHS.second); }; for (unsigned i = 2; i <= MaxVF.getFixedValue(); i *= 2) { @@ -5432,7 +5438,7 @@ // we need to divide the cost of the vector loops by the width of // the vector elements. VectorizationCostTy C = expectedCost(ElementCount::getFixed(i)); - std::pair VectorCost = {C.first, i}; + std::pair VectorCost = {C.first, i}; LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " << (C.first / i) << ".\n"); if (!C.second && !ForceVectorization) { @@ -5457,7 +5463,11 @@ << "LV: Vectorization seems to be not beneficial, " << "but was forced by a user.\n"); LLVM_DEBUG(dbgs() << "LV: Selecting VF: " << MinCost.second << ".\n"); - return {ElementCount::getFixed(MinCost.second), MinCost.first}; + + ElementCount EC = ElementCount::getFixed(MinCost.second); + if (auto FinalCost = MinCost.first.getValue()) + return {EC, unsigned(*FinalCost)}; + return {EC, std::numeric_limits::max()}; } std::pair @@ -5637,8 +5647,10 @@ // If we did not calculate the cost for VF (because the user selected the VF) // then we calculate the cost of VF here. - if (LoopCost == 0) - LoopCost = expectedCost(VF).first; + if (LoopCost == 0) { + InstructionCost ExpectedCost = expectedCost(VF).first; + LoopCost = *(ExpectedCost.getValue()); + } assert(LoopCost && "Non-zero loop cost expected"); @@ -5960,14 +5972,13 @@ } int LoopVectorizationCostModel::computePredInstDiscount( - Instruction *PredInst, DenseMap &ScalarCosts, - ElementCount VF) { + Instruction *PredInst, ScalarCostsTy &ScalarCosts, ElementCount VF) { assert(!isUniformAfterVectorization(PredInst, VF) && "Instruction marked uniform-after-vectorization will be predicated"); // Initialize the discount to zero, meaning that the scalar version and the // vector version cost the same. - int Discount = 0; + InstructionCost Discount = 0; // Holds instructions to analyze. The instructions we visit are mapped in // ScalarCosts. Those instructions are the ones that would be scalarized if @@ -6022,14 +6033,14 @@ // Compute the cost of the vector instruction. Note that this cost already // includes the scalarization overhead of the predicated instruction. - unsigned VectorCost = getInstructionCost(I, VF).first; + InstructionCost VectorCost = getInstructionCost(I, VF).first; // Compute the cost of the scalarized instruction. This cost is the cost of // the instruction as if it wasn't if-converted and instead remained in the // predicated block. We will scale this cost by block probability after // computing the scalarization overhead. assert(!VF.isScalable() && "scalable vectors not yet supported."); - unsigned ScalarCost = + InstructionCost ScalarCost = VF.getKnownMinValue() * getInstructionCost(I, ElementCount::getFixed(1)).first; @@ -6072,7 +6083,7 @@ ScalarCosts[I] = ScalarCost; } - return Discount; + return *(Discount.getValue()); } LoopVectorizationCostModel::VectorizationCostTy @@ -6095,7 +6106,7 @@ // Check if we should override the cost. if (ForceTargetInstructionCost.getNumOccurrences() > 0) - C.first = ForceTargetInstructionCost; + C.first = InstructionCost(ForceTargetInstructionCost); BlockCost.first += C.first; BlockCost.second |= C.second; @@ -6346,7 +6357,7 @@ } Type *VectorTy; - unsigned C = getInstructionCost(I, VF, VectorTy); + InstructionCost C = getInstructionCost(I, VF, VectorTy); bool TypeNotScalarized = VF.isVector() && VectorTy->isVectorTy() && @@ -6541,9 +6552,9 @@ } } -unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, - ElementCount VF, - Type *&VectorTy) { +InstructionCost +LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, + Type *&VectorTy) { Type *RetTy = I->getType(); if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);