diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -130,6 +130,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstructionCost.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -1635,7 +1636,7 @@ /// is /// false, then all operations will be scalarized (i.e. no vectorization has /// actually taken place). - using VectorizationCostTy = std::pair; + using VectorizationCostTy = std::pair; /// Returns the expected execution cost. The unit of the cost does /// not matter because we use the 'cost' units to compare different @@ -1649,7 +1650,8 @@ /// The cost-computation logic from getInstructionCost which provides /// the vector type as an output parameter. - unsigned getInstructionCost(Instruction *I, ElementCount VF, Type *&VectorTy); + InstructionCost getInstructionCost(Instruction *I, ElementCount VF, + Type *&VectorTy); /// Calculate vectorization cost of memory instruction \p I. unsigned getMemoryInstructionCost(Instruction *I, ElementCount VF); @@ -1693,7 +1695,7 @@ /// A type representing the costs for instructions if they were to be /// scalarized rather than vectorized. The entries are Instruction-Cost /// pairs. - using ScalarCostsTy = DenseMap; + using ScalarCostsTy = DenseMap; /// A set containing all BasicBlocks that are known to present after /// vectorization as a predicated block. @@ -5759,10 +5761,13 @@ // vectors when the loop has a hint to enable vectorization for a given VF. assert(!MaxVF.isScalable() && "scalable vectors not yet supported"); - float Cost = expectedCost(ElementCount::getFixed(1)).first; - const float ScalarCost = Cost; + InstructionCost ExpectedCost = expectedCost(ElementCount::getFixed(1)).first; + LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n"); + assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop"); + unsigned Width = 1; - LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << (int)ScalarCost << ".\n"); + const float ScalarCost = *ExpectedCost.getValue(); + float Cost = ScalarCost; bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled; if (ForceVectorization && MaxVF.isVector()) { @@ -5777,7 +5782,8 @@ // we need to divide the cost of the vector loops by the width of // the vector elements. VectorizationCostTy C = expectedCost(ElementCount::getFixed(i)); - float VectorCost = C.first / (float)i; + assert(C.first.isValid() && "Unexpected invalid cost for vector loop"); + float VectorCost = *C.first.getValue() / (float)i; LLVM_DEBUG(dbgs() << "LV: Vector loop of width " << i << " costs: " << (int)VectorCost << ".\n"); if (!C.second && !ForceVectorization) { @@ -6119,8 +6125,10 @@ // If we did not calculate the cost for VF (because the user selected the VF) // then we calculate the cost of VF here. - if (LoopCost == 0) - LoopCost = expectedCost(VF).first; + if (LoopCost == 0) { + assert(expectedCost(VF).first.isValid() && "Expected a valid cost"); + LoopCost = *expectedCost(VF).first.getValue(); + } assert(LoopCost && "Non-zero loop cost expected"); @@ -6442,14 +6450,13 @@ } int LoopVectorizationCostModel::computePredInstDiscount( - Instruction *PredInst, DenseMap &ScalarCosts, - ElementCount VF) { + Instruction *PredInst, ScalarCostsTy &ScalarCosts, ElementCount VF) { assert(!isUniformAfterVectorization(PredInst, VF) && "Instruction marked uniform-after-vectorization will be predicated"); // Initialize the discount to zero, meaning that the scalar version and the // vector version cost the same. - int Discount = 0; + InstructionCost Discount = 0; // Holds instructions to analyze. The instructions we visit are mapped in // ScalarCosts. Those instructions are the ones that would be scalarized if @@ -6504,14 +6511,14 @@ // Compute the cost of the vector instruction. Note that this cost already // includes the scalarization overhead of the predicated instruction. - unsigned VectorCost = getInstructionCost(I, VF).first; + InstructionCost VectorCost = getInstructionCost(I, VF).first; // Compute the cost of the scalarized instruction. This cost is the cost of // the instruction as if it wasn't if-converted and instead remained in the // predicated block. We will scale this cost by block probability after // computing the scalarization overhead. assert(!VF.isScalable() && "scalable vectors not yet supported."); - unsigned ScalarCost = + InstructionCost ScalarCost = VF.getKnownMinValue() * getInstructionCost(I, ElementCount::getFixed(1)).first; @@ -6554,7 +6561,7 @@ ScalarCosts[I] = ScalarCost; } - return Discount; + return *Discount.getValue(); } LoopVectorizationCostModel::VectorizationCostTy @@ -6576,7 +6583,7 @@ // Check if we should override the cost. if (ForceTargetInstructionCost.getNumOccurrences() > 0) - C.first = ForceTargetInstructionCost; + C.first = InstructionCost(ForceTargetInstructionCost); BlockCost.first += C.first; BlockCost.second |= C.second; @@ -6828,7 +6835,7 @@ } Type *VectorTy; - unsigned C = getInstructionCost(I, VF, VectorTy); + InstructionCost C = getInstructionCost(I, VF, VectorTy); bool TypeNotScalarized = VF.isVector() && VectorTy->isVectorTy() && @@ -7022,9 +7029,9 @@ } } -unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, - ElementCount VF, - Type *&VectorTy) { +InstructionCost +LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, + Type *&VectorTy) { Type *RetTy = I->getType(); if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); @@ -7299,12 +7306,8 @@ return std::min(CallCost, getVectorIntrinsicCost(CI, VF)); return CallCost; } - case Instruction::ExtractValue: { - InstructionCost ExtractCost = - TTI.getInstructionCost(I, TTI::TCK_RecipThroughput); - assert(ExtractCost.isValid() && "Invalid cost for ExtractValue"); - return *(ExtractCost.getValue()); - } + case Instruction::ExtractValue: + return TTI.getInstructionCost(I, TTI::TCK_RecipThroughput); default: // The cost of executing VF copies of the scalar instruction. This opcode // is unknown. Assume that it is the same as 'mul'.