diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -79,6 +79,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/InstructionCost.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -1493,7 +1494,7 @@ bool areAllUsersVectorized(Instruction *I) const; /// \returns the cost of the vectorizable entry. - int getEntryCost(TreeEntry *E); + InstructionCost getEntryCost(TreeEntry *E); /// This is the recursive part of buildTree. void buildTree_rec(ArrayRef Roots, unsigned Depth, @@ -1515,13 +1516,14 @@ /// \returns the scalarization cost for this type. Scalarization in this /// context means the creation of vectors from a group of scalars. - int getGatherCost(FixedVectorType *Ty, - const DenseSet &ShuffledIndices) const; + InstructionCost + getGatherCost(FixedVectorType *Ty, + const DenseSet &ShuffledIndices) const; /// \returns the scalarization cost for this list of values. Assuming that /// this subtree gets vectorized, we may need to extract the values from the /// roots. This method calculates the cost of extracting the values. - int getGatherCost(ArrayRef VL) const; + InstructionCost getGatherCost(ArrayRef VL) const; /// Set the Builder insert point to one after the last instruction in /// the bundle @@ -1755,8 +1757,9 @@ }; #ifndef NDEBUG - void dumpTreeCosts(TreeEntry *E, int ReuseShuffleCost, int VecCost, - int ScalarCost) const { + void dumpTreeCosts(TreeEntry *E, InstructionCost ReuseShuffleCost, + InstructionCost VecCost, + InstructionCost ScalarCost) const { dbgs() << "SLP: Calculated costs for Tree:\n"; E->dump(); dbgs() << "SLP: Costs:\n"; dbgs() << "SLP: ReuseShuffleCost = " << ReuseShuffleCost << "\n"; @@ -3423,7 +3426,7 @@ return {IntrinsicCost, LibCost}; } -int BoUpSLP::getEntryCost(TreeEntry *E) { +InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) { ArrayRef VL = E->Scalars; Type *ScalarTy = VL[0]->getType(); @@ -3442,7 +3445,7 @@ unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size(); bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); - int ReuseShuffleCost = 0; + InstructionCost ReuseShuffleCost = 0; if (NeedToShuffleReuses) { ReuseShuffleCost = TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy); @@ -3458,7 +3461,8 @@ allSameType(VL) && allSameBlock(VL)) { Optional ShuffleKind = isShuffle(VL); if (ShuffleKind.hasValue()) { - int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy); + InstructionCost Cost = + TTI->getShuffleCost(ShuffleKind.getValue(), VecTy); for (auto *V : VL) { // If all users of instruction are going to be vectorized and this // instruction itself is not going to be vectorized, consider this @@ -3490,7 +3494,7 @@ case Instruction::ExtractValue: case Instruction::ExtractElement: { - int DeadCost = 0; + InstructionCost DeadCost = 0; if (NeedToShuffleReuses) { unsigned Idx = 0; for (unsigned I : E->ReuseShuffleIndices) { @@ -3565,7 +3569,7 @@ case Instruction::FPTrunc: case Instruction::BitCast: { Type *SrcTy = VL0->getOperand(0)->getType(); - int ScalarEltCost = + InstructionCost ScalarEltCost = TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, TTI::getCastContextHint(VL0), CostKind, VL0); if (NeedToShuffleReuses) { @@ -3573,10 +3577,10 @@ } // Calculate the cost of this instruction. - int ScalarCost = VL.size() * ScalarEltCost; + InstructionCost ScalarCost = VL.size() * ScalarEltCost; auto *SrcVecTy = FixedVectorType::get(SrcTy, VL.size()); - int VecCost = 0; + InstructionCost VecCost = 0; // Check if the values are candidates to demote. if (!MinBWs.count(VL0) || VecTy != SrcVecTy) { VecCost = @@ -3591,14 +3595,14 @@ case Instruction::ICmp: case Instruction::Select: { // Calculate the cost of this instruction. - int ScalarEltCost = + InstructionCost ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(), CmpInst::BAD_ICMP_PREDICATE, CostKind, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size()); - int ScalarCost = VecTy->getNumElements() * ScalarEltCost; + InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost; // Check if all entries in VL are either compares or selects with compares // as condition that have the same predicates. @@ -3617,8 +3621,8 @@ VecPred = CurrentPred; } - int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, - VecPred, CostKind, VL0); + InstructionCost VecCost = TTI->getCmpSelInstrCost( + E->getOpcode(), VecTy, MaskTy, VecPred, CostKind, VL0); // Check if it is possible and profitable to use min/max for selects in // VL. // @@ -3626,7 +3630,8 @@ if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) { IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy, {VecTy, VecTy}); - int IntrinsicCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind); + InstructionCost IntrinsicCost = + TTI->getIntrinsicInstrCost(CostAttrs, CostKind); // If the selects are the only uses of the compares, they will be dead // and we can adjust the cost by removing their cost. if (IntrinsicAndUse.second) @@ -3695,16 +3700,16 @@ } SmallVector Operands(VL0->operand_values()); - int ScalarEltCost = TTI->getArithmeticInstrCost( - E->getOpcode(), ScalarTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP, - Operands, VL0); + InstructionCost ScalarEltCost = + TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind, Op1VK, + Op2VK, Op1VP, Op2VP, Operands, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } - int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getArithmeticInstrCost( - E->getOpcode(), VecTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP, - Operands, VL0); + InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost; + InstructionCost VecCost = + TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind, Op1VK, + Op2VK, Op1VP, Op2VP, Operands, VL0); LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost)); return ReuseShuffleCost + VecCost - ScalarCost; } @@ -3714,30 +3719,27 @@ TargetTransformInfo::OperandValueKind Op2VK = TargetTransformInfo::OK_UniformConstantValue; - int ScalarEltCost = - TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, CostKind, - Op1VK, Op2VK); + InstructionCost ScalarEltCost = TTI->getArithmeticInstrCost( + Instruction::Add, ScalarTy, CostKind, Op1VK, Op2VK); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } - int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = - TTI->getArithmeticInstrCost(Instruction::Add, VecTy, CostKind, - Op1VK, Op2VK); + InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost; + InstructionCost VecCost = TTI->getArithmeticInstrCost( + Instruction::Add, VecTy, CostKind, Op1VK, Op2VK); LLVM_DEBUG(dumpTreeCosts(E, ReuseShuffleCost, VecCost, ScalarCost)); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::Load: { // Cost of wide load - cost of scalar loads. Align alignment = cast(VL0)->getAlign(); - int ScalarEltCost = - TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, - CostKind, VL0); + InstructionCost ScalarEltCost = TTI->getMemoryOpCost( + Instruction::Load, ScalarTy, alignment, 0, CostKind, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } - int ScalarLdCost = VecTy->getNumElements() * ScalarEltCost; - int VecLdCost; + InstructionCost ScalarLdCost = VecTy->getNumElements() * ScalarEltCost; + InstructionCost VecLdCost; if (E->State == TreeEntry::Vectorize) { VecLdCost = TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0, CostKind, VL0); @@ -3759,12 +3761,11 @@ auto *SI = cast(IsReorder ? VL[E->ReorderIndices.front()] : VL0); Align Alignment = SI->getAlign(); - int ScalarEltCost = - TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0, - CostKind, VL0); - int ScalarStCost = VecTy->getNumElements() * ScalarEltCost; - int VecStCost = TTI->getMemoryOpCost(Instruction::Store, - VecTy, Alignment, 0, CostKind, VL0); + InstructionCost ScalarEltCost = TTI->getMemoryOpCost( + Instruction::Store, ScalarTy, Alignment, 0, CostKind, VL0); + InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost; + InstructionCost VecStCost = TTI->getMemoryOpCost( + Instruction::Store, VecTy, Alignment, 0, CostKind, VL0); if (IsReorder) VecStCost += TTI->getShuffleCost( TargetTransformInfo::SK_PermuteSingleSrc, VecTy); @@ -3777,14 +3778,16 @@ // Calculate the cost of the scalar and vector calls. IntrinsicCostAttributes CostAttrs(ID, *CI, ElementCount::getFixed(1), 1); - int ScalarEltCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind); + InstructionCost ScalarEltCost = + TTI->getIntrinsicInstrCost(CostAttrs, CostKind); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } - int ScalarCallCost = VecTy->getNumElements() * ScalarEltCost; + InstructionCost ScalarCallCost = VecTy->getNumElements() * ScalarEltCost; auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); - int VecCallCost = std::min(VecCallCosts.first, VecCallCosts.second); + InstructionCost VecCallCost = + std::min(VecCallCosts.first, VecCallCosts.second); LLVM_DEBUG(dbgs() << "SLP: Call cost " << VecCallCost - ScalarCallCost << " (" << VecCallCost << "-" << ScalarCallCost << ")" @@ -3799,7 +3802,7 @@ (Instruction::isCast(E->getOpcode()) && Instruction::isCast(E->getAltOpcode()))) && "Invalid Shuffle Vector Operand"); - int ScalarCost = 0; + InstructionCost ScalarCost = 0; if (NeedToShuffleReuses) { for (unsigned Idx : E->ReuseShuffleIndices) { Instruction *I = cast(VL[Idx]); @@ -3823,7 +3826,7 @@ } // VecCost is equal to sum of the cost of creating 2 vectors // and the cost of creating shuffle. - int VecCost = 0; + InstructionCost VecCost = 0; if (Instruction::isBinaryOp(E->getOpcode())) { VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, @@ -4120,21 +4123,23 @@ return Cost; } -int BoUpSLP::getGatherCost(FixedVectorType *Ty, - const DenseSet &ShuffledIndices) const { +InstructionCost +BoUpSLP::getGatherCost(FixedVectorType *Ty, + const DenseSet &ShuffledIndices) const { unsigned NumElts = Ty->getNumElements(); APInt DemandedElts = APInt::getNullValue(NumElts); for (unsigned I = 0; I < NumElts; ++I) if (!ShuffledIndices.count(I)) DemandedElts.setBit(I); - int Cost = TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true, - /*Extract*/ false); + InstructionCost Cost = + TTI->getScalarizationOverhead(Ty, DemandedElts, /*Insert*/ true, + /*Extract*/ false); if (!ShuffledIndices.empty()) Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty); return Cost; } -int BoUpSLP::getGatherCost(ArrayRef VL) const { +InstructionCost BoUpSLP::getGatherCost(ArrayRef VL) const { // Find the type of the operands in VL. Type *ScalarTy = VL[0]->getType(); if (StoreInst *SI = dyn_cast(VL[0]))