diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -674,8 +674,9 @@ /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or /// extracted from vectors. - unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, - bool Insert, bool Extract) const; + unsigned getScalarizationOverhead(FixedVectorType *Ty, + const APInt &DemandedElts, bool Insert, + bool Extract) const; /// Estimate the overhead of scalarizing an instructions unique /// non-constant operands. The types of the arguments are ordinarily @@ -950,8 +951,8 @@ /// extraction shuffle kinds to show the insert/extract point and the type of /// the subvector being inserted/extracted. /// NOTE: For subvector extractions Tp represents the source type. - int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0, - VectorType *SubTp = nullptr) const; + int getShuffleCost(ShuffleKind Kind, FixedVectorType *Tp, int Index = 0, + FixedVectorType *SubTp = nullptr) const; /// \return The expected cost of cast instructions, such as bitcast, trunc, /// zext, etc. If there is an existing instruction that holds Opcode, it @@ -962,7 +963,8 @@ /// \return The expected cost of a sign- or zero-extended vector extract. Use /// -1 to indicate that there is no information about the index value. - int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, + int getExtractWithExtendCost(unsigned Opcode, Type *Dst, + FixedVectorType *VecTy, unsigned Index = -1) const; /// \return The expected cost of control-flow related instructions such as @@ -1036,12 +1038,13 @@ /// (v0, v1, v2, v3) /// ((v0+v2), (v1+v3), undef, undef) int getArithmeticReductionCost( - unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; + unsigned Opcode, FixedVectorType *Ty, bool IsPairwiseForm, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; int getMinMaxReductionCost( - VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; + FixedVectorType *Ty, FixedVectorType *CondTy, bool IsPairwiseForm, + bool IsUnsigned, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; /// \returns The cost of Intrinsic instructions. Analyses the real arguments. /// Three cases are handled: 1. scalar instruction 2. vector instruction @@ -1295,7 +1298,7 @@ virtual bool shouldBuildLookupTables() = 0; virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; virtual bool useColdCCForColdCall(Function &F) = 0; - virtual unsigned getScalarizationOverhead(VectorType *Ty, + virtual unsigned getScalarizationOverhead(FixedVectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) = 0; virtual unsigned @@ -1372,13 +1375,14 @@ OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI = nullptr) = 0; - virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, - VectorType *SubTp) = 0; + virtual int getShuffleCost(ShuffleKind Kind, FixedVectorType *Tp, int Index, + FixedVectorType *SubTp) = 0; virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::TargetCostKind CostKind, const Instruction *I) = 0; virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, - VectorType *VecTy, unsigned Index) = 0; + FixedVectorType *VecTy, + unsigned Index) = 0; virtual int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) = 0; virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, @@ -1406,10 +1410,11 @@ TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0; - virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + virtual int getArithmeticReductionCost(unsigned Opcode, FixedVectorType *Ty, bool IsPairwiseForm, TTI::TargetCostKind CostKind) = 0; - virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + virtual int getMinMaxReductionCost(FixedVectorType *Ty, + FixedVectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind) = 0; virtual int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, @@ -1628,8 +1633,9 @@ return Impl.useColdCCForColdCall(F); } - unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, - bool Insert, bool Extract) override { + unsigned getScalarizationOverhead(FixedVectorType *Ty, + const APInt &DemandedElts, bool Insert, + bool Extract) override { return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); } unsigned getOperandsScalarizationOverhead(ArrayRef Args, @@ -1774,8 +1780,8 @@ return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); } - int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, - VectorType *SubTp) override { + int getShuffleCost(ShuffleKind Kind, FixedVectorType *Tp, int Index, + FixedVectorType *SubTp) override { return Impl.getShuffleCost(Kind, Tp, Index, SubTp); } int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, @@ -1783,7 +1789,8 @@ const Instruction *I) override { return Impl.getCastInstrCost(Opcode, Dst, Src, CostKind, I); } - int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, + int getExtractWithExtendCost(unsigned Opcode, Type *Dst, + FixedVectorType *VecTy, unsigned Index) override { return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); } @@ -1827,13 +1834,13 @@ Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } - int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + int getArithmeticReductionCost(unsigned Opcode, FixedVectorType *Ty, bool IsPairwiseForm, TTI::TargetCostKind CostKind) override { return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, CostKind); } - int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + int getMinMaxReductionCost(FixedVectorType *Ty, FixedVectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind) override { return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -80,7 +80,7 @@ /// Estimate a cost of Broadcast as an extract and sequence of insert /// operations. - unsigned getBroadcastShuffleOverhead(VectorType *VTy) { + unsigned getBroadcastShuffleOverhead(FixedVectorType *VTy) { unsigned Cost = 0; // Broadcast cost is equal to the cost of extracting the zero'th element // plus the cost of inserting it into every element of the result vector. @@ -96,7 +96,7 @@ /// Estimate a cost of shuffle as a sequence of extract and insert /// operations. - unsigned getPermuteShuffleOverhead(VectorType *VTy) { + unsigned getPermuteShuffleOverhead(FixedVectorType *VTy) { unsigned Cost = 0; // Shuffle cost is equal to the cost of extracting element from its argument // plus the cost of inserting them onto the result vector. @@ -116,8 +116,8 @@ /// Estimate a cost of subvector extraction as a sequence of extract and /// insert operations. - unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index, - VectorType *SubVTy) { + unsigned getExtractSubvectorOverhead(FixedVectorType *VTy, int Index, + FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only extract subvectors from vectors"); int NumSubElts = SubVTy->getNumElements(); @@ -139,8 +139,8 @@ /// Estimate a cost of subvector insertion as a sequence of extract and /// insert operations. - unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index, - VectorType *SubVTy) { + unsigned getInsertSubvectorOverhead(FixedVectorType *VTy, int Index, + FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only insert subvectors into vectors"); int NumSubElts = SubVTy->getNumElements(); @@ -525,8 +525,9 @@ /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the demanded result elements need to be inserted and/or /// extracted from vectors. - unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, - bool Insert, bool Extract) { + unsigned getScalarizationOverhead(FixedVectorType *Ty, + const APInt &DemandedElts, bool Insert, + bool Extract) { assert(DemandedElts.getBitWidth() == Ty->getNumElements() && "Vector size mismatch"); @@ -547,7 +548,8 @@ } /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead. - unsigned getScalarizationOverhead(VectorType *Ty, bool Insert, bool Extract) { + unsigned getScalarizationOverhead(FixedVectorType *Ty, bool Insert, + bool Extract) { APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements()); return static_cast(this)->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); @@ -562,14 +564,16 @@ SmallPtrSet UniqueOperands; for (const Value *A : Args) { if (!isa(A) && UniqueOperands.insert(A).second) { - auto *VecTy = dyn_cast(A->getType()); + assert(!isa(A->getType()) && + "Only fixed width vectors supported"); + auto *VecTy = dyn_cast(A->getType()); if (VecTy) { // If A is a vector operand, VF should be 1 or correspond to A. assert((VF == 1 || VF == VecTy->getNumElements()) && "Vector argument does not match VF"); } else - VecTy = VectorType::get(A->getType(), VF); + VecTy = FixedVectorType::get(A->getType(), VF); Cost += getScalarizationOverhead(VecTy, false, true); } @@ -578,7 +582,8 @@ return Cost; } - unsigned getScalarizationOverhead(VectorType *Ty, ArrayRef Args) { + unsigned getScalarizationOverhead(FixedVectorType *Ty, + ArrayRef Args) { unsigned Cost = 0; Cost += getScalarizationOverhead(Ty, true, false); @@ -630,7 +635,9 @@ // Else, assume that we need to scalarize this op. // TODO: If one of the types get legalized by splitting, handle this // similarly to what getCastInstrCost() does. - if (auto *VTy = dyn_cast(Ty)) { + assert(!isa(Ty) && + "Only fixed width vectors supported"); + if (auto *VTy = dyn_cast(Ty)) { unsigned Num = VTy->getNumElements(); unsigned Cost = static_cast(this)->getArithmeticInstrCost( Opcode, VTy->getScalarType(), CostKind); @@ -643,8 +650,8 @@ return OpCost; } - unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, - VectorType *SubTp) { + unsigned getShuffleCost(TTI::ShuffleKind Kind, FixedVectorType *Tp, int Index, + FixedVectorType *SubTp) { switch (Kind) { case TTI::SK_Broadcast: return getBroadcastShuffleOverhead(Tp); @@ -727,8 +734,10 @@ break; } - auto *SrcVTy = dyn_cast(Src); - auto *DstVTy = dyn_cast(Dst); + assert(!isa(Src) && !isa(Dst) && + "Only fixed width vectors supported"); + auto *SrcVTy = dyn_cast(Src); + auto *DstVTy = dyn_cast(Dst); // If the cast is marked as legal (or promote) then assume low cost. if (SrcLT.first == DstLT.first && @@ -779,10 +788,8 @@ TargetLowering::TypeSplitVector; if ((SplitSrc || SplitDst) && SrcVTy->getNumElements() > 1 && DstVTy->getNumElements() > 1) { - Type *SplitDstTy = VectorType::get(DstVTy->getElementType(), - DstVTy->getNumElements() / 2); - Type *SplitSrcTy = VectorType::get(SrcVTy->getElementType(), - SrcVTy->getNumElements() / 2); + auto *SplitDstTy = FixedVectorType::getHalfElementsVectorType(DstVTy); + auto *SplitSrcTy = FixedVectorType::getHalfElementsVectorType(SrcVTy); T *TTI = static_cast(this); // If both types need to be split then the split is free. unsigned SplitCost = @@ -818,7 +825,7 @@ } unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, - VectorType *VecTy, unsigned Index) { + FixedVectorType *VecTy, unsigned Index) { return static_cast(this)->getVectorInstrCost( Instruction::ExtractElement, VecTy, Index) + static_cast(this)->getCastInstrCost(Opcode, Dst, @@ -856,7 +863,9 @@ // Otherwise, assume that the cast is scalarized. // TODO: If one of the types get legalized by splitting, handle this // similarly to what getCastInstrCost() does. - if (auto *ValVTy = dyn_cast(ValTy)) { + assert(!isa(ValTy) && + "Only fixed width vectors supported"); + if (auto *ValVTy = dyn_cast(ValTy)) { unsigned Num = ValVTy->getNumElements(); if (CondTy) CondTy = CondTy->getScalarType(); @@ -904,7 +913,7 @@ if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { // This is a vector load/store for some illegal type that is scalarized. // We must account for the cost of building or decomposing the vector. - Cost += getScalarizationOverhead(cast(Src), + Cost += getScalarizationOverhead(cast(Src), Opcode != Instruction::Store, Opcode == Instruction::Store); } @@ -920,13 +929,13 @@ TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false) { - auto *VT = cast(VecTy); + auto *VT = cast(VecTy); unsigned NumElts = VT->getNumElements(); assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"); unsigned NumSubElts = NumElts / Factor; - VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts); + auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts); // Firstly, the cost of load/store operation. unsigned Cost; @@ -1035,8 +1044,8 @@ return Cost; Type *I8Type = Type::getInt8Ty(VT->getContext()); - VectorType *MaskVT = VectorType::get(I8Type, NumElts); - SubVT = VectorType::get(I8Type, NumSubElts); + auto *MaskVT = FixedVectorType::get(I8Type, NumElts); + SubVT = FixedVectorType::get(I8Type, NumSubElts); // The Mask shuffling cost is extract all the elements of the Mask // and insert each of them Factor times into the wide vector: @@ -1103,8 +1112,9 @@ Type *RetTy = ICA.getReturnType(); unsigned VF = ICA.getVectorFactor(); - unsigned RetVF = - (RetTy->isVectorTy() ? cast(RetTy)->getNumElements() : 1); + unsigned RetVF = (isa(RetTy) + ? cast(RetTy)->getNumElements() + : 1); assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); const IntrinsicInst *I = ICA.getInst(); const SmallVectorImpl &Args = ICA.getArgs(); @@ -1117,11 +1127,11 @@ for (Value *Op : Args) { Type *OpTy = Op->getType(); assert(VF == 1 || !OpTy->isVectorTy()); - Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF)); + Types.push_back(VF == 1 ? OpTy : FixedVectorType::get(OpTy, VF)); } if (VF > 1 && !RetTy->isVoidTy()) - RetTy = VectorType::get(RetTy, VF); + RetTy = FixedVectorType::get(RetTy, VF); // Compute the scalarization overhead based on Args for a vector // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while @@ -1130,8 +1140,8 @@ if (RetVF > 1 || VF > 1) { ScalarizationCost = 0; if (!RetTy->isVoidTy()) - ScalarizationCost += - getScalarizationOverhead(cast(RetTy), true, false); + ScalarizationCost += getScalarizationOverhead( + cast(RetTy), true, false); ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); } @@ -1234,7 +1244,12 @@ unsigned ScalarizationCostPassed = ICA.getScalarizationCost(); bool SkipScalarizationCost = ICA.skipScalarizationCost(); - auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast(Tys[0]); + FixedVectorType *VecOpTy = nullptr; + if (!Tys.empty()) { + assert(!isa(Tys[0]) && + "Only fixed width vectors supported"); + VecOpTy = dyn_cast(Tys[0]); + } SmallVector ISDs; unsigned SingleCallCost = 10; // Library call cost. Make it expensive. @@ -1244,7 +1259,9 @@ unsigned ScalarizationCost = ScalarizationCostPassed; unsigned ScalarCalls = 1; Type *ScalarRetTy = RetTy; - if (auto *RetVTy = dyn_cast(RetTy)) { + assert(!isa(RetTy) && + "Only fixed width vectors supported"); + if (auto *RetVTy = dyn_cast(RetTy)) { if (!SkipScalarizationCost) ScalarizationCost = getScalarizationOverhead(RetVTy, true, false); ScalarCalls = std::max(ScalarCalls, RetVTy->getNumElements()); @@ -1253,9 +1270,10 @@ SmallVector ScalarTys; for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { Type *Ty = Tys[i]; - if (auto *VTy = dyn_cast(Ty)) { - if (!SkipScalarizationCost) - ScalarizationCost += getScalarizationOverhead(VTy, false, true); + assert(!isa(Ty) && + "Only fixed width vectors supported"); + if (auto *VTy = dyn_cast(Ty)) { + ScalarizationCost += getScalarizationOverhead(VTy, false, true); ScalarCalls = std::max(ScalarCalls, VTy->getNumElements()); Ty = Ty->getScalarType(); } @@ -1394,13 +1412,13 @@ case Intrinsic::experimental_vector_reduce_fmax: case Intrinsic::experimental_vector_reduce_fmin: return ConcreteTTI->getMinMaxReductionCost( - VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), + VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), /*IsPairwiseForm=*/false, /*IsUnsigned=*/false, CostKind); case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: return ConcreteTTI->getMinMaxReductionCost( - VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), + VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), /*IsPairwiseForm=*/false, /*IsUnsigned=*/true, CostKind); case Intrinsic::sadd_sat: @@ -1610,9 +1628,12 @@ // Else, assume that we need to scalarize this intrinsic. For math builtins // this will emit a costly libcall, adding call overhead and spills. Make it // very expensive. - if (auto *RetVTy = dyn_cast(RetTy)) { - unsigned ScalarizationCost = SkipScalarizationCost ? - ScalarizationCostPassed : getScalarizationOverhead(RetVTy, true, false); + assert(!isa(RetTy) && + "Only fixed width vectors supported"); + if (auto *RetVTy = dyn_cast(RetTy)) { + unsigned ScalarizationCost = + SkipScalarizationCost ? ScalarizationCostPassed + : getScalarizationOverhead(RetVTy, true, false); unsigned ScalarCalls = RetVTy->getNumElements(); SmallVector ScalarTys; @@ -1625,7 +1646,9 @@ IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF); unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(Attrs, CostKind); for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { - if (auto *VTy = dyn_cast(Tys[i])) { + assert(!isa(Tys[i]) && + "Only fixed width vectors supported"); + if (auto *VTy = dyn_cast(Tys[i])) { if (!ICA.skipScalarizationCost()) ScalarizationCost += getScalarizationOverhead(VTy, false, true); ScalarCalls = std::max(ScalarCalls, VTy->getNumElements()); @@ -1699,7 +1722,7 @@ /// /// The cost model should take into account that the actual length of the /// vector is reduced on each iteration. - unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + unsigned getArithmeticReductionCost(unsigned Opcode, FixedVectorType *Ty, bool IsPairwise, TTI::TargetCostKind CostKind) { Type *ScalarTy = Ty->getElementType(); @@ -1715,7 +1738,7 @@ LT.second.isVector() ? LT.second.getVectorNumElements() : 1; while (NumVecElts > MVTLen) { NumVecElts /= 2; - VectorType *SubTy = VectorType::get(ScalarTy, NumVecElts); + auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); // Assume the pairwise shuffles add a cost. ShuffleCost += (IsPairwise + 1) * ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, @@ -1749,7 +1772,7 @@ /// Try to calculate op costs for min/max reduction operations. /// \param CondTy Conditional type for the Select instruction. - unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + unsigned getMinMaxReductionCost(FixedVectorType *Ty, FixedVectorType *CondTy, bool IsPairwise, bool IsUnsigned, TTI::TargetCostKind CostKind) { Type *ScalarTy = Ty->getElementType(); @@ -1774,8 +1797,8 @@ LT.second.isVector() ? LT.second.getVectorNumElements() : 1; while (NumVecElts > MVTLen) { NumVecElts /= 2; - VectorType *SubTy = VectorType::get(ScalarTy, NumVecElts); - CondTy = VectorType::get(ScalarCondTy, NumVecElts); + auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); + CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts); // Assume the pairwise shuffles add a cost. ShuffleCost += (IsPairwise + 1) * diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -457,7 +457,7 @@ } unsigned -TargetTransformInfo::getScalarizationOverhead(VectorType *Ty, +TargetTransformInfo::getScalarizationOverhead(FixedVectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const { return TTIImpl->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); @@ -689,8 +689,9 @@ return Cost; } -int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty, - int Index, VectorType *SubTp) const { +int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, FixedVectorType *Ty, + int Index, + FixedVectorType *SubTp) const { int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; @@ -707,7 +708,7 @@ } int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst, - VectorType *VecTy, + FixedVectorType *VecTy, unsigned Index) const { int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index); assert(Cost >= 0 && "TTI should not produce negative costs!"); @@ -818,10 +819,9 @@ return Cost; } -int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, - VectorType *Ty, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind) const { +int TargetTransformInfo::getArithmeticReductionCost( + unsigned Opcode, FixedVectorType *Ty, bool IsPairwiseForm, + TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); @@ -829,8 +829,8 @@ } int TargetTransformInfo::getMinMaxReductionCost( - VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, - TTI::TargetCostKind CostKind) const { + FixedVectorType *Ty, FixedVectorType *CondTy, bool IsPairwiseForm, + bool IsUnsigned, TTI::TargetCostKind CostKind) const { int Cost = TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, CostKind); @@ -1102,7 +1102,7 @@ static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, - VectorType *&Ty) { + FixedVectorType *&Ty) { if (!EnableReduxCost) return RK_None; @@ -1121,7 +1121,7 @@ if (!RD) return RK_None; - auto *VecTy = cast(RdxStart->getType()); + auto *VecTy = cast(RdxStart->getType()); unsigned NumVecElems = VecTy->getNumElements(); if (!isPowerOf2_32(NumVecElems)) return RK_None; @@ -1167,7 +1167,7 @@ static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, - unsigned &Opcode, VectorType *&Ty) { + unsigned &Opcode, FixedVectorType *&Ty) { if (!EnableReduxCost) return RK_None; @@ -1186,7 +1186,7 @@ if (!RD) return RK_None; - auto *VecTy = cast(ReduxRoot->getOperand(0)->getType()); + auto *VecTy = cast(ReduxRoot->getOperand(0)->getType()); unsigned NumVecElems = VecTy->getNumElements(); if (!isPowerOf2_32(NumVecElems)) return RK_None; @@ -1342,7 +1342,7 @@ // Try to match a reduction sequence (series of shufflevector and vector // adds followed by a extractelement). unsigned ReduxOpCode; - VectorType *ReduxType; + FixedVectorType *ReduxType; switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { case RK_Arithmetic: @@ -1351,11 +1351,13 @@ CostKind); case RK_MinMax: return getMinMaxReductionCost( - ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), + ReduxType, + cast(CmpInst::makeCmpResultType(ReduxType)), /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); case RK_UnsignedMinMax: return getMinMaxReductionCost( - ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), + ReduxType, + cast(CmpInst::makeCmpResultType(ReduxType)), /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); case RK_None: break; @@ -1367,11 +1369,13 @@ /*IsPairwiseForm=*/true, CostKind); case RK_MinMax: return getMinMaxReductionCost( - ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), + ReduxType, + cast(CmpInst::makeCmpResultType(ReduxType)), /*IsPairwiseForm=*/true, /*IsUnsigned=*/false); case RK_UnsignedMinMax: return getMinMaxReductionCost( - ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), + ReduxType, + cast(CmpInst::makeCmpResultType(ReduxType)), /*IsPairwiseForm=*/true, /*IsUnsigned=*/true); case RK_None: break; @@ -1392,8 +1396,8 @@ return 0; // Model all ExtractValue nodes as free. case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast(I); - auto *Ty = cast(Shuffle->getType()); - auto *SrcTy = cast(Shuffle->getOperand(0)->getType()); + auto *Ty = cast(Shuffle->getType()); + auto *SrcTy = cast(Shuffle->getOperand(0)->getType()); // TODO: Identify and add costs for insert subvector, etc. int SubIndex; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -230,12 +230,12 @@ bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; - int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); + int getArithmeticReductionCost( + unsigned Opcode, FixedVectorType *Ty, bool IsPairwiseForm, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); - int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, - VectorType *SubTp); + int getShuffleCost(TTI::ShuffleKind Kind, FixedVectorType *Tp, int Index, + FixedVectorType *SubTp); /// @} }; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -716,12 +716,13 @@ bool UseMaskForCond, bool UseMaskForGaps) { assert(Factor >= 2 && "Invalid interleave factor"); - auto *VecVTy = cast(VecTy); + auto *VecVTy = cast(VecTy); if (!UseMaskForCond && !UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) { unsigned NumElts = VecVTy->getNumElements(); - auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); + auto *SubVecTy = + FixedVectorType::get(VecVTy->getElementType(), NumElts / Factor); // ldN/stN only support legal vector types of size 64 or 128 in bits. // Accesses having vector types that are a multiple of 128 bits can be @@ -731,7 +732,7 @@ return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL); } - return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + return BaseT::getInterleavedMemoryOpCost(Opcode, VecVTy, Factor, Indices, Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } @@ -957,7 +958,7 @@ } int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, - VectorType *ValTy, + FixedVectorType *ValTy, bool IsPairwiseForm, TTI::TargetCostKind CostKind) { @@ -988,8 +989,8 @@ CostKind); } -int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, - int Index, VectorType *SubTp) { +int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, FixedVectorType *Tp, + int Index, FixedVectorType *SubTp) { if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) { static const CostTblEntry ShuffleTbl[] = { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -216,8 +216,8 @@ unsigned getVectorSplitCost() { return 0; } - unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, - VectorType *SubTp); + unsigned getShuffleCost(TTI::ShuffleKind Kind, FixedVectorType *Tp, int Index, + FixedVectorType *SubTp); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; @@ -227,16 +227,14 @@ int getInlinerVectorBonusPercent() { return 0; } int getArithmeticReductionCost( - unsigned Opcode, - VectorType *Ty, - bool IsPairwise, + unsigned Opcode, FixedVectorType *Ty, bool IsPairwise, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); int getMinMaxReductionCost( - VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); + FixedVectorType *Ty, FixedVectorType *CondTy, bool IsPairwiseForm, + bool IsUnsigned, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); unsigned getUserCost(const User *U, ArrayRef Operands, TTI::TargetCostKind CostKind); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -608,7 +608,7 @@ } } -int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, +int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, FixedVectorType *Ty, bool IsPairwise, TTI::TargetCostKind CostKind) { EVT OrigTy = TLI->getValueType(DL, Ty); @@ -624,8 +624,9 @@ return LT.first * getFullRateInstrCost(); } -int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwise, bool IsUnsigned, +int GCNTTIImpl::getMinMaxReductionCost(FixedVectorType *Ty, + FixedVectorType *CondTy, bool IsPairwise, + bool IsUnsigned, TTI::TargetCostKind CostKind) { EVT OrigTy = TLI->getValueType(DL, Ty); @@ -920,10 +921,10 @@ } } -unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *VT, - int Index, VectorType *SubTp) { +unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, FixedVectorType *VT, + int Index, FixedVectorType *SubTp) { if (ST->hasVOP3PInsts()) { - if (cast(VT)->getNumElements() == 2 && + if (VT->getNumElements() == 2 && DL.getTypeSizeInBits(VT->getElementType()) == 16) { // With op_sel VOP3P instructions freely can access the low half or high // half of a register, so any swizzle is free. @@ -1003,8 +1004,8 @@ } case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast(I); - auto *Ty = cast(Shuffle->getType()); - auto *SrcTy = cast(Shuffle->getOperand(0)->getType()); + auto *Ty = cast(Shuffle->getType()); + auto *SrcTy = cast(Shuffle->getOperand(0)->getType()); // TODO: Identify and add costs for insert subvector, etc. int SubIndex; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -167,8 +167,8 @@ int getMemcpyCost(const Instruction *I); - int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, - VectorType *SubTp); + int getShuffleCost(TTI::ShuffleKind Kind, FixedVectorType *Tp, int Index, + FixedVectorType *SubTp); bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -653,8 +653,8 @@ return LibCallCost; } -int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, - int Index, VectorType *SubTp) { +int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, FixedVectorType *Tp, + int Index, FixedVectorType *SubTp) { if (ST->hasNEON()) { if (Kind == TTI::SK_Broadcast) { static const CostTblEntry NEONDupTbl[] = { diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -101,8 +101,9 @@ return true; } - unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, - bool Insert, bool Extract); + unsigned getScalarizationOverhead(FixedVectorType *Ty, + const APInt &DemandedElts, bool Insert, + bool Extract); unsigned getOperandsScalarizationOverhead(ArrayRef Args, unsigned VF); unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -115,7 +115,7 @@ return (8 * ST.getVectorLength()) / ElemWidth; } -unsigned HexagonTTIImpl::getScalarizationOverhead(VectorType *Ty, +unsigned HexagonTTIImpl::getScalarizationOverhead(FixedVectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) { return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -83,8 +83,8 @@ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef Args = ArrayRef(), const Instruction *CxtI = nullptr); - int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, - VectorType *SubTp); + int getShuffleCost(TTI::ShuffleKind Kind, FixedVectorType *Tp, int Index, + FixedVectorType *SubTp); unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -530,8 +530,8 @@ Opd1PropInfo, Opd2PropInfo, Args, CxtI); } -int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, - int Index, VectorType *SubTp) { +int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, FixedVectorType *Tp, + int Index, FixedVectorType *SubTp) { if (ST->hasVector()) { unsigned NumVectors = getNumVectorRegs(Tp); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -126,8 +126,8 @@ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef Args = ArrayRef(), const Instruction *CxtI = nullptr); - int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, - VectorType *SubTp); + int getShuffleCost(TTI::ShuffleKind Kind, FixedVectorType *Tp, int Index, + FixedVectorType *SubTp); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); @@ -135,8 +135,9 @@ TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, - bool Insert, bool Extract); + unsigned getScalarizationOverhead(FixedVectorType *Ty, + const APInt &DemandedElts, bool Insert, + bool Extract); int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, @@ -158,13 +159,13 @@ int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); - int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency); + int getArithmeticReductionCost( + unsigned Opcode, FixedVectorType *Ty, bool IsPairwiseForm, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency); int getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned); - int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + int getMinMaxReductionCost(FixedVectorType *Ty, FixedVectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -935,8 +935,8 @@ return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); } -int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp, - int Index, VectorType *SubTp) { +int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, FixedVectorType *BaseTp, + int Index, FixedVectorType *SubTp) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. // 64-bit packed integer vectors (v2i32) are widened to type v4i32. std::pair LT = TLI->getTypeLegalizationCost(DL, BaseTp); @@ -977,11 +977,10 @@ BaseTp->getElementType()->getPrimitiveSizeInBits()) { assert(NumElts >= NumSubElts && NumElts > OrigSubElts && "Unexpected number of elements!"); - VectorType *VecTy = VectorType::get(BaseTp->getElementType(), - LT.second.getVectorNumElements()); - VectorType *SubTy = - VectorType::get(BaseTp->getElementType(), - SubLT.second.getVectorNumElements()); + auto *VecTy = FixedVectorType::get(BaseTp->getElementType(), + LT.second.getVectorNumElements()); + auto *SubTy = FixedVectorType::get(BaseTp->getElementType(), + SubLT.second.getVectorNumElements()); int ExtractIndex = alignDown((Index % NumElts), NumSubElts); int ExtractCost = getShuffleCost(TTI::SK_ExtractSubvector, VecTy, ExtractIndex, SubTy); @@ -1052,9 +1051,8 @@ // Number of destination vectors after legalization: unsigned NumOfDests = LT.first; - VectorType *SingleOpTy = - VectorType::get(BaseTp->getElementType(), - LegalVT.getVectorNumElements()); + auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(), + LegalVT.getVectorNumElements()); unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; return NumOfShuffles * @@ -2882,10 +2880,10 @@ // TODO: Under what circumstances should we shuffle using the full width? int ShuffleCost = 1; if (Opcode == Instruction::InsertElement) { - auto *SubTy = cast(Val); + auto *SubTy = cast(Val); EVT VT = TLI->getValueType(DL, Val); if (VT.getScalarType() != MScalarTy || VT.getSizeInBits() >= 128) - SubTy = VectorType::get(ScalarType, SubNumElts); + SubTy = FixedVectorType::get(ScalarType, SubNumElts); ShuffleCost = getShuffleCost(TTI::SK_PermuteTwoSrc, SubTy, 0, SubTy); } int IntOrFpCost = ScalarType->isFloatingPointTy() ? 0 : 1; @@ -2900,7 +2898,7 @@ return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; } -unsigned X86TTIImpl::getScalarizationOverhead(VectorType *Ty, +unsigned X86TTIImpl::getScalarizationOverhead(FixedVectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) { unsigned Cost = 0; @@ -2962,7 +2960,7 @@ TTI::TargetCostKind CostKind, const Instruction *I) { // Handle non-power-of-two vectors such as <3 x float> - if (VectorType *VTy = dyn_cast(Src)) { + if (auto *VTy = dyn_cast(Src)) { unsigned NumElem = VTy->getNumElements(); // Handle a few common cases: @@ -3011,15 +3009,15 @@ bool IsLoad = (Instruction::Load == Opcode); bool IsStore = (Instruction::Store == Opcode); - VectorType *SrcVTy = dyn_cast(SrcTy); + auto *SrcVTy = dyn_cast(SrcTy); if (!SrcVTy) // To calculate scalar take the regular cost, without mask return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace, CostKind); unsigned NumElem = SrcVTy->getNumElements(); - VectorType *MaskTy = - VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem); + auto *MaskTy = + FixedVectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem); if ((IsLoad && !isLegalMaskedLoad(SrcVTy, MaybeAlign(Alignment))) || (IsStore && !isLegalMaskedStore(SrcVTy, MaybeAlign(Alignment))) || !isPowerOf2_32(NumElem)) { @@ -3052,8 +3050,8 @@ getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, 0, nullptr); else if (LT.second.getVectorNumElements() > NumElem) { - VectorType *NewMaskTy = VectorType::get(MaskTy->getElementType(), - LT.second.getVectorNumElements()); + auto *NewMaskTy = FixedVectorType::get(MaskTy->getElementType(), + LT.second.getVectorNumElements()); // Expanding requires fill mask with zeroes Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy); } @@ -3091,7 +3089,8 @@ return BaseT::getAddressComputationCost(Ty, SE, Ptr); } -int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, +int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, + FixedVectorType *ValTy, bool IsPairwise, TTI::TargetCostKind CostKind) { // Just use the default implementation for pair reductions. @@ -3158,7 +3157,7 @@ MVT MTy = LT.second; - auto *ValVTy = cast(ValTy); + auto *ValVTy = cast(ValTy); unsigned ArithmeticCost = 0; if (LT.first != 1 && MTy.isVector() && @@ -3272,7 +3271,8 @@ if (LT.first != 1 && MTy.isVector() && MTy.getVectorNumElements() < ValVTy->getNumElements()) { // Type needs to be split. We need LT.first - 1 arithmetic ops. - Ty = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); + Ty = FixedVectorType::get(ValVTy->getElementType(), + MTy.getVectorNumElements()); ReductionCost = getArithmeticInstrCost(Opcode, Ty, CostKind); ReductionCost *= LT.first - 1; NumVecElts = MTy.getVectorNumElements(); @@ -3286,26 +3286,30 @@ NumVecElts /= 2; // If we're reducing from 256/512 bits, use an extract_subvector. if (Size > 128) { - auto *SubTy = VectorType::get(ValVTy->getElementType(), NumVecElts); + auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts); ReductionCost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy); Ty = SubTy; } else if (Size == 128) { // Reducing from 128 bits is a permute of v2f64/v2i64. - VectorType *ShufTy; + FixedVectorType *ShufTy; if (ValVTy->isFloatingPointTy()) - ShufTy = VectorType::get(Type::getDoubleTy(ValVTy->getContext()), 2); + ShufTy = + FixedVectorType::get(Type::getDoubleTy(ValVTy->getContext()), 2); else - ShufTy = VectorType::get(Type::getInt64Ty(ValVTy->getContext()), 2); + ShufTy = + FixedVectorType::get(Type::getInt64Ty(ValVTy->getContext()), 2); ReductionCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr); } else if (Size == 64) { // Reducing from 64 bits is a shuffle of v4f32/v4i32. - VectorType *ShufTy; + FixedVectorType *ShufTy; if (ValVTy->isFloatingPointTy()) - ShufTy = VectorType::get(Type::getFloatTy(ValVTy->getContext()), 4); + ShufTy = + FixedVectorType::get(Type::getFloatTy(ValVTy->getContext()), 4); else - ShufTy = VectorType::get(Type::getInt32Ty(ValVTy->getContext()), 4); + ShufTy = + FixedVectorType::get(Type::getInt32Ty(ValVTy->getContext()), 4); ReductionCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr); } else { @@ -3450,8 +3454,9 @@ getCmpSelInstrCost(Instruction::Select, Ty, CondTy, CostKind); } -int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, - bool IsPairwise, bool IsUnsigned, +int X86TTIImpl::getMinMaxReductionCost(FixedVectorType *ValTy, + FixedVectorType *CondTy, bool IsPairwise, + bool IsUnsigned, TTI::TargetCostKind CostKind) { // Just use the default implementation for pair reductions. if (IsPairwise) @@ -3534,7 +3539,7 @@ return Entry->Cost; } - auto *ValVTy = cast(ValTy); + auto *ValVTy = cast(ValTy); unsigned NumVecElts = ValVTy->getNumElements(); auto *Ty = ValVTy; @@ -3542,9 +3547,10 @@ if (LT.first != 1 && MTy.isVector() && MTy.getVectorNumElements() < ValVTy->getNumElements()) { // Type needs to be split. We need LT.first - 1 operations ops. - Ty = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); - auto *SubCondTy = VectorType::get( - cast(CondTy)->getElementType(), MTy.getVectorNumElements()); + Ty = FixedVectorType::get(ValVTy->getElementType(), + MTy.getVectorNumElements()); + auto *SubCondTy = FixedVectorType::get(CondTy->getElementType(), + MTy.getVectorNumElements()); MinMaxCost = getMinMaxCost(Ty, SubCondTy, IsUnsigned); MinMaxCost *= LT.first - 1; NumVecElts = MTy.getVectorNumElements(); @@ -3583,31 +3589,32 @@ NumVecElts /= 2; // If we're reducing from 256/512 bits, use an extract_subvector. if (Size > 128) { - auto *SubTy = VectorType::get(ValVTy->getElementType(), NumVecElts); + auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts); MinMaxCost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy); Ty = SubTy; } else if (Size == 128) { // Reducing from 128 bits is a permute of v2f64/v2i64. - VectorType *ShufTy; + FixedVectorType *ShufTy; if (ValTy->isFloatingPointTy()) - ShufTy = VectorType::get(Type::getDoubleTy(ValTy->getContext()), 2); + ShufTy = + FixedVectorType::get(Type::getDoubleTy(ValTy->getContext()), 2); else - ShufTy = VectorType::get(Type::getInt64Ty(ValTy->getContext()), 2); + ShufTy = FixedVectorType::get(Type::getInt64Ty(ValTy->getContext()), 2); MinMaxCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr); } else if (Size == 64) { // Reducing from 64 bits is a shuffle of v4f32/v4i32. - VectorType *ShufTy; + FixedVectorType *ShufTy; if (ValTy->isFloatingPointTy()) - ShufTy = VectorType::get(Type::getFloatTy(ValTy->getContext()), 4); + ShufTy = FixedVectorType::get(Type::getFloatTy(ValTy->getContext()), 4); else - ShufTy = VectorType::get(Type::getInt32Ty(ValTy->getContext()), 4); + ShufTy = FixedVectorType::get(Type::getInt32Ty(ValTy->getContext()), 4); MinMaxCost += getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr); } else { // Reducing from smaller size is a shift by immediate. - VectorType *ShiftTy = VectorType::get( + auto *ShiftTy = FixedVectorType::get( Type::getIntNTy(ValTy->getContext(), Size), 128 / Size); MinMaxCost += getArithmeticInstrCost( Instruction::LShr, ShiftTy, TTI::TCK_RecipThroughput, @@ -3905,8 +3912,8 @@ int MaskUnpackCost = 0; if (VariableMask) { - VectorType *MaskTy = - VectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF); + auto *MaskTy = + FixedVectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF); MaskUnpackCost = getScalarizationOverhead(MaskTy, DemandedElts, false, true); int ScalarCompareCost = @@ -4370,8 +4377,8 @@ // Get the cost of one memory operation. auto *SingleMemOpTy = - VectorType::get(cast(VecTy)->getElementType(), - LegalVT.getVectorNumElements()); + FixedVectorType::get(cast(VecTy)->getElementType(), + LegalVT.getVectorNumElements()); unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5721,9 +5721,9 @@ // Compute the scalarization overhead of needed insertelement instructions // and phi nodes. if (isScalarWithPredication(I) && !I->getType()->isVoidTy()) { - ScalarCost += TTI.getScalarizationOverhead( - cast(ToVectorTy(I->getType(), VF)), - APInt::getAllOnesValue(VF), true, false); + ScalarCost += + TTI.getScalarizationOverhead(FixedVectorType::get(I->getType(), VF), + APInt::getAllOnesValue(VF), true, false); ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI); } @@ -5739,7 +5739,7 @@ Worklist.push_back(J); else if (needsExtract(J, VF)) ScalarCost += TTI.getScalarizationOverhead( - cast(ToVectorTy(J->getType(), VF)), + FixedVectorType::get(J->getType(), VF), APInt::getAllOnesValue(VF), false, true); } @@ -5880,7 +5880,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); - auto *VectorTy = cast(ToVectorTy(ValTy, VF)); + auto *VectorTy = FixedVectorType::get(ValTy, VF); Value *Ptr = getLoadStorePointerOperand(I); unsigned AS = getLoadStoreAddressSpace(I); int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); @@ -5906,7 +5906,7 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); - auto *VectorTy = cast(ToVectorTy(ValTy, VF)); + auto *VectorTy = FixedVectorType::get(ValTy, VF); const Align Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; @@ -5945,7 +5945,7 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); - auto *VectorTy = cast(ToVectorTy(ValTy, VF)); + auto *VectorTy = FixedVectorType::get(ValTy, VF); unsigned AS = getLoadStoreAddressSpace(I); auto Group = getInterleavedAccessGroup(I); @@ -6034,7 +6034,7 @@ if (!RetTy->isVoidTy() && (!isa(I) || !TTI.supportsEfficientVectorElementLoadStore())) Cost += TTI.getScalarizationOverhead( - cast(RetTy), APInt::getAllOnesValue(VF), true, false); + cast(RetTy), APInt::getAllOnesValue(VF), true, false); // Some targets keep addresses scalar. if (isa(I) && !TTI.prefersVectorizedAddressing()) @@ -6239,8 +6239,8 @@ if (ScalarPredicatedBB) { // Return cost for branches around scalarized and predicated blocks. - VectorType *Vec_i1Ty = - VectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF); + auto *Vec_i1Ty = + FixedVectorType::get(IntegerType::getInt1Ty(RetTy->getContext()), VF); return (TTI.getScalarizationOverhead(Vec_i1Ty, APInt::getAllOnesValue(VF), false, true) + (TTI.getCFInstrCost(Instruction::Br) * VF)); @@ -6261,8 +6261,8 @@ // NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type. if (VF > 1 && Legal->isFirstOrderRecurrence(Phi)) return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, - cast(VectorTy), VF - 1, - VectorType::get(RetTy, 1)); + cast(VectorTy), VF - 1, + FixedVectorType::get(RetTy, 1)); // Phi nodes in non-header blocks (not inductions, reductions, etc.) are // converted into select instructions. We require N - 1 selects per phi diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1409,7 +1409,7 @@ /// \returns the scalarization cost for this type. Scalarization in this /// context means the creation of vectors from a group of scalars. - int getGatherCost(VectorType *Ty, + int getGatherCost(FixedVectorType *Ty, const DenseSet &ShuffledIndices) const; /// \returns the scalarization cost for this list of values. Assuming that @@ -3282,13 +3282,13 @@ ScalarTy = SI->getValueOperand()->getType(); else if (CmpInst *CI = dyn_cast(VL[0])) ScalarTy = CI->getOperand(0)->getType(); - VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); + auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; // If we have computed a smaller type for the expression, update VecTy so // that the costs will be accurate. if (MinBWs.count(VL[0])) - VecTy = VectorType::get( + VecTy = FixedVectorType::get( IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size()); unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size(); @@ -3871,13 +3871,13 @@ // If we plan to rewrite the tree in a smaller type, we will need to sign // extend the extracted value back to the original type. Here, we account // for the extract and the added cost of the sign extend if needed. - auto *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth); + auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth); auto *ScalarRoot = VectorizableTree[0]->Scalars[0]; if (MinBWs.count(ScalarRoot)) { auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first); auto Extend = MinBWs[ScalarRoot].second ? Instruction::SExt : Instruction::ZExt; - VecTy = VectorType::get(MinTy, BundleWidth); + VecTy = FixedVectorType::get(MinTy, BundleWidth); ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(), VecTy, EU.Lane); } else { @@ -3904,7 +3904,7 @@ return Cost; } -int BoUpSLP::getGatherCost(VectorType *Ty, +int BoUpSLP::getGatherCost(FixedVectorType *Ty, const DenseSet &ShuffledIndices) const { unsigned NumElts = Ty->getNumElements(); APInt DemandedElts = APInt::getNullValue(NumElts); @@ -3923,7 +3923,7 @@ Type *ScalarTy = VL[0]->getType(); if (StoreInst *SI = dyn_cast(VL[0])) ScalarTy = SI->getValueOperand()->getType(); - VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); + auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); // Find the cost of inserting/extracting values from the vector. // Check if the same elements are inserted several times and count them as // shuffle candidates. @@ -6929,7 +6929,7 @@ int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal, unsigned ReduxWidth) { Type *ScalarTy = FirstReducedVal->getType(); - VectorType *VecTy = VectorType::get(ScalarTy, ReduxWidth); + auto *VecTy = FixedVectorType::get(ScalarTy, ReduxWidth); int PairwiseRdxCost; int SplittingRdxCost; @@ -6946,7 +6946,8 @@ case RK_Max: case RK_UMin: case RK_UMax: { - auto *VecCondTy = cast(CmpInst::makeCmpResultType(VecTy)); + auto *VecCondTy = + cast(CmpInst::makeCmpResultType(VecTy)); bool IsUnsigned = ReductionData.getKind() == RK_UMin || ReductionData.getKind() == RK_UMax; PairwiseRdxCost = diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -59,7 +59,7 @@ isa(Ext1->getOperand(1)) && "Expected constant extract indexes"); Type *ScalarTy = Ext0->getType(); - auto *VecTy = cast(Ext0->getOperand(0)->getType()); + auto *VecTy = cast(Ext0->getOperand(0)->getType()); int ScalarOpCost, VectorOpCost; // Get cost estimates for scalar and vector versions of the operation. @@ -272,8 +272,8 @@ // Disallow non-vector casts and length-changing shuffles. // TODO: We could allow any shuffle. - auto *DestTy = dyn_cast(I.getType()); - auto *SrcTy = cast(V->getType()); + auto *DestTy = dyn_cast(I.getType()); + auto *SrcTy = cast(V->getType()); if (!DestTy || I.getOperand(0)->getType() != SrcTy) return false;