Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -912,8 +912,8 @@ /// extraction shuffle kinds to show the insert/extract point and the type of /// the subvector being inserted/extracted. /// NOTE: For subvector extractions Tp represents the source type. - int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0, - Type *SubTp = nullptr) const; + int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0, + VectorType *SubTp = nullptr) const; /// \return The expected cost of cast instructions, such as bitcast, trunc, /// zext, etc. If there is an existing instruction that holds Opcode, it @@ -991,10 +991,10 @@ /// Split: /// (v0, v1, v2, v3) /// ((v0+v2), (v1+v3), undef, undef) - int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwiseForm) const; - int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, - bool IsUnsigned) const; + int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + bool IsPairwiseForm, bool IsUnsigned) const; /// \returns The cost of Intrinsic instructions. Analyses the real arguments. /// Three cases are handled: 1. scalar instruction 2. vector instruction @@ -1335,8 +1335,8 @@ OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI = nullptr) = 0; - virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) = 0; + virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, + VectorType *SubTp) = 0; virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I) = 0; virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, @@ -1359,9 +1359,9 @@ ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0; - virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwiseForm) = 0; - virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy, + virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, @@ -1735,8 +1735,8 @@ return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); } - int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) override { + int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, + VectorType *SubTp) override { return Impl.getShuffleCost(Kind, Tp, Index, SubTp); } int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, @@ -1779,12 +1779,12 @@ Alignment, AddressSpace, UseMaskForCond, UseMaskForGaps); } - int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwiseForm) override { return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); } - int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, - bool IsUnsigned) override { + int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + bool IsPairwiseForm, bool IsUnsigned) override { return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); } int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -438,8 +438,8 @@ return 1; } - unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index, - Type *SubTp) { + unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, int Index, + VectorType *SubTp) { return 1; } @@ -512,9 +512,9 @@ return 0; } - unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; } + unsigned getArithmeticReductionCost(unsigned, VectorType *, bool) { return 1; } - unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; } + unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool) { return 1; } unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys) { return 0; } Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -80,8 +80,7 @@ /// Estimate a cost of Broadcast as an extract and sequence of insert /// operations. - unsigned getBroadcastShuffleOverhead(Type *Ty) { - auto *VTy = cast(Ty); + unsigned getBroadcastShuffleOverhead(VectorType *VTy) { unsigned Cost = 0; // Broadcast cost is equal to the cost of extracting the zero'th element // plus the cost of inserting it into every element of the result vector. @@ -97,8 +96,7 @@ /// Estimate a cost of shuffle as a sequence of extract and insert /// operations. - unsigned getPermuteShuffleOverhead(Type *Ty) { - auto *VTy = cast(Ty); + unsigned getPermuteShuffleOverhead(VectorType *VTy) { unsigned Cost = 0; // Shuffle cost is equal to the cost of extracting element from its argument // plus the cost of inserting them onto the result vector. @@ -118,11 +116,10 @@ /// Estimate a cost of subvector extraction as a sequence of extract and /// insert operations. - unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { - assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && + unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index, + VectorType *SubVTy) { + assert(VTy && SubVTy && "Can only extract subvectors from vectors"); - auto *VTy = cast(Ty); - auto *SubVTy = cast(SubTy); int NumSubElts = SubVTy->getNumElements(); assert((Index + NumSubElts) <= (int)VTy->getNumElements() && "SK_ExtractSubvector index out of range"); @@ -142,11 +139,10 @@ /// Estimate a cost of subvector insertion as a sequence of extract and /// insert operations. - unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) { - assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && + unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index, + VectorType *SubVTy) { + assert(VTy && SubVTy && "Can only insert subvectors into vectors"); - auto *VTy = cast(Ty); - auto *SubVTy = cast(SubTy); int NumSubElts = SubVTy->getNumElements(); assert((Index + NumSubElts) <= (int)VTy->getNumElements() && "SK_InsertSubvector index out of range"); @@ -683,8 +679,8 @@ return OpCost; } - unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) { + unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, + VectorType *SubTp) { switch (Kind) { case TTI::SK_Broadcast: return getBroadcastShuffleOverhead(Tp); @@ -1198,6 +1194,7 @@ unsigned ScalarizationCostPassed = std::numeric_limits::max(), const Instruction *I = nullptr) { auto *ConcreteTTI = static_cast(this); + auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast(Tys[0]); SmallVector ISDs; unsigned SingleCallCost = 10; // Library call cost. Make it expensive. @@ -1320,28 +1317,28 @@ case Intrinsic::masked_load: return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); case Intrinsic::experimental_vector_reduce_add: - return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0], + return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, VecOpTy, /*IsPairwiseForm=*/false); case Intrinsic::experimental_vector_reduce_mul: - return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0], + return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, VecOpTy, /*IsPairwiseForm=*/false); case Intrinsic::experimental_vector_reduce_and: - return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0], + return ConcreteTTI->getArithmeticReductionCost(Instruction::And, VecOpTy, /*IsPairwiseForm=*/false); case Intrinsic::experimental_vector_reduce_or: - return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0], + return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, VecOpTy, /*IsPairwiseForm=*/false); case Intrinsic::experimental_vector_reduce_xor: - return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0], + return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, VecOpTy, /*IsPairwiseForm=*/false); case Intrinsic::experimental_vector_reduce_v2_fadd: return ConcreteTTI->getArithmeticReductionCost( - Instruction::FAdd, Tys[0], + Instruction::FAdd, VecOpTy, /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict // reductions. case Intrinsic::experimental_vector_reduce_v2_fmul: return ConcreteTTI->getArithmeticReductionCost( - Instruction::FMul, Tys[0], + Instruction::FMul, VecOpTy, /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict // reductions. case Intrinsic::experimental_vector_reduce_smax: @@ -1349,12 +1346,14 @@ case Intrinsic::experimental_vector_reduce_fmax: case Intrinsic::experimental_vector_reduce_fmin: return ConcreteTTI->getMinMaxReductionCost( - Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, + VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), + /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: return ConcreteTTI->getMinMaxReductionCost( - Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false, + VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), + /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { @@ -1639,11 +1638,10 @@ /// /// The cost model should take into account that the actual length of the /// vector is reduced on each iteration. - unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, + unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwise) { - assert(Ty->isVectorTy() && "Expect a vector type"); - Type *ScalarTy = cast(Ty)->getElementType(); - unsigned NumVecElts = cast(Ty)->getNumElements(); + Type *ScalarTy = Ty->getElementType(); + unsigned NumVecElts = Ty->getNumElements(); unsigned NumReduxLevels = Log2_32(NumVecElts); unsigned ArithCost = 0; unsigned ShuffleCost = 0; @@ -1655,7 +1653,7 @@ LT.second.isVector() ? LT.second.getVectorNumElements() : 1; while (NumVecElts > MVTLen) { NumVecElts /= 2; - Type *SubTy = VectorType::get(ScalarTy, NumVecElts); + VectorType *SubTy = VectorType::get(ScalarTy, NumVecElts); // Assume the pairwise shuffles add a cost. ShuffleCost += (IsPairwise + 1) * ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, @@ -1689,12 +1687,11 @@ /// Try to calculate op costs for min/max reduction operations. /// \param CondTy Conditional type for the Select instruction. - unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, - bool) { - assert(Ty->isVectorTy() && "Expect a vector type"); - Type *ScalarTy = cast(Ty)->getElementType(); - Type *ScalarCondTy = cast(CondTy)->getElementType(); - unsigned NumVecElts = cast(Ty)->getNumElements(); + unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + bool IsPairwise, bool) { + Type *ScalarTy = Ty->getElementType(); + Type *ScalarCondTy = CondTy->getElementType(); + unsigned NumVecElts = Ty->getNumElements(); unsigned NumReduxLevels = Log2_32(NumVecElts); unsigned CmpOpcode; if (Ty->isFPOrFPVectorTy()) { @@ -1714,7 +1711,7 @@ LT.second.isVector() ? LT.second.getVectorNumElements() : 1; while (NumVecElts > MVTLen) { NumVecElts /= 2; - Type *SubTy = VectorType::get(ScalarTy, NumVecElts); + VectorType *SubTy = VectorType::get(ScalarTy, NumVecElts); CondTy = VectorType::get(ScalarCondTy, NumVecElts); // Assume the pairwise shuffles add a cost. Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -599,8 +599,8 @@ return Cost; } -int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index, - Type *SubTp) const { +int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, VectorType *Ty, + int Index, VectorType *SubTp) const { int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; @@ -732,14 +732,16 @@ return Cost; } -int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty, +int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, + VectorType *Ty, bool IsPairwiseForm) const { int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getMinMaxReductionCost(Type *Ty, Type *CondTy, +int TargetTransformInfo::getMinMaxReductionCost(VectorType *Ty, + VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned) const { int Cost = @@ -1011,7 +1013,8 @@ } static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, - unsigned &Opcode, Type *&Ty) { + unsigned &Opcode, + VectorType *&Ty) { if (!EnableReduxCost) return RK_None; @@ -1076,7 +1079,7 @@ static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, - unsigned &Opcode, Type *&Ty) { + unsigned &Opcode, VectorType *&Ty) { if (!EnableReduxCost) return RK_None; @@ -1249,7 +1252,7 @@ // Try to match a reduction sequence (series of shufflevector and vector // adds followed by a extractelement). unsigned ReduxOpCode; - Type *ReduxType; + VectorType *ReduxType; switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { case RK_Arithmetic: @@ -1257,11 +1260,11 @@ /*IsPairwiseForm=*/false); case RK_MinMax: return getMinMaxReductionCost( - ReduxType, CmpInst::makeCmpResultType(ReduxType), + ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); case RK_UnsignedMinMax: return getMinMaxReductionCost( - ReduxType, CmpInst::makeCmpResultType(ReduxType), + ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); case RK_None: break; @@ -1273,11 +1276,11 @@ /*IsPairwiseForm=*/true); case RK_MinMax: return getMinMaxReductionCost( - ReduxType, CmpInst::makeCmpResultType(ReduxType), + ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), /*IsPairwiseForm=*/true, /*IsUnsigned=*/false); case RK_UnsignedMinMax: return getMinMaxReductionCost( - ReduxType, CmpInst::makeCmpResultType(ReduxType), + ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), /*IsPairwiseForm=*/true, /*IsUnsigned=*/true); case RK_None: break; @@ -1298,8 +1301,8 @@ return 0; // Model all ExtractValue nodes as free. case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast(I); - Type *Ty = Shuffle->getType(); - Type *SrcTy = Shuffle->getOperand(0)->getType(); + VectorType *Ty = cast(Shuffle->getType()); + VectorType *SrcTy = cast(Shuffle->getOperand(0)->getType()); // TODO: Identify and add costs for insert subvector, etc. int SubIndex; Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -224,10 +224,11 @@ bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; - int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwiseForm); - int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, + VectorType *SubTp); /// @} }; Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -930,7 +930,8 @@ return false; } -int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy, +int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, + VectorType *ValTy, bool IsPairwiseForm) { if (IsPairwiseForm) @@ -958,8 +959,8 @@ return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm); } -int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) { +int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, + int Index, VectorType *SubTp) { if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) { static const CostTblEntry ShuffleTbl[] = { Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -215,8 +215,8 @@ unsigned getVectorSplitCost() { return 0; } - unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp); + unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, + VectorType *SubTp); bool areInlineCompatible(const Function *Caller, const Function *Callee) const; @@ -226,7 +226,7 @@ int getInlinerVectorBonusPercent() { return 0; } int getArithmeticReductionCost(unsigned Opcode, - Type *Ty, + VectorType *Ty, bool IsPairwise); template int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Args, @@ -239,7 +239,7 @@ int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, unsigned VF = 1, const Instruction *I = nullptr); - int getMinMaxReductionCost(Type *Ty, Type *CondTy, + int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned); unsigned getUserCost(const User *U, ArrayRef Operands); Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -620,8 +620,8 @@ } } -int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *Ty, - bool IsPairwise) { +int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + bool IsPairwise) { EVT OrigTy = TLI->getValueType(DL, Ty); // Computes cost on targets that have packed math instructions(which support @@ -635,7 +635,7 @@ return LT.first * getFullRateInstrCost(); } -int GCNTTIImpl::getMinMaxReductionCost(Type *Ty, Type *CondTy, +int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwise, bool IsUnsigned) { EVT OrigTy = TLI->getValueType(DL, Ty); @@ -899,10 +899,9 @@ } } -unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) { +unsigned GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *VT, + int Index, VectorType *SubTp) { if (ST->hasVOP3PInsts()) { - VectorType *VT = cast(Tp); if (VT->getNumElements() == 2 && DL.getTypeSizeInBits(VT->getElementType()) == 16) { // With op_sel VOP3P instructions freely can access the low half or high @@ -919,7 +918,7 @@ } } - return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); + return BaseT::getShuffleCost(Kind, VT, Index, SubTp); } bool GCNTTIImpl::areInlineCompatible(const Function *Caller, @@ -986,8 +985,8 @@ } case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast(I); - Type *Ty = Shuffle->getType(); - Type *SrcTy = Shuffle->getOperand(0)->getType(); + auto *Ty = cast(Shuffle->getType()); + auto *SrcTy = cast(Shuffle->getOperand(0)->getType()); // TODO: Identify and add costs for insert subvector, etc. int SubIndex; Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -164,7 +164,8 @@ int getMemcpyCost(const Instruction *I); - int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, + VectorType *SubTp); bool useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const; Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -587,8 +587,8 @@ return LibCallCost; } -int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) { +int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, + int Index, VectorType *SubTp) { if (ST->hasNEON()) { if (Kind == TTI::SK_Broadcast) { static const CostTblEntry NEONDupTbl[] = { Index: llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -81,7 +81,8 @@ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef Args = ArrayRef(), const Instruction *CxtI = nullptr); - int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, + VectorType *SubTp); unsigned getVectorTruncCost(Type *SrcTy, Type *DstTy); unsigned getVectorBitmaskConversionCost(Type *SrcTy, Type *DstTy); unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, Index: llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -526,9 +526,8 @@ Opd1PropInfo, Opd2PropInfo, Args, CxtI); } -int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) { - assert (Tp->isVectorTy()); +int SystemZTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, + int Index, VectorType *SubTp) { if (ST->hasVector()) { unsigned NumVectors = getNumVectorRegs(Tp); Index: llvm/lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- llvm/lib/Target/X86/X86TargetTransformInfo.h +++ llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -127,7 +127,8 @@ TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, ArrayRef Args = ArrayRef(), const Instruction *CxtI = nullptr); - int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); + int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, + VectorType *SubTp); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, @@ -154,13 +155,13 @@ ArrayRef Args, FastMathFlags FMF, unsigned VF = 1, const Instruction *I = nullptr); - int getArithmeticReductionCost(unsigned Opcode, Type *Ty, + int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwiseForm); int getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned); - int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, - bool IsUnsigned); + int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, + bool IsPairwiseForm, bool IsUnsigned); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Index: llvm/lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -925,8 +925,8 @@ return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); } -int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, - Type *SubTp) { +int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, + int Index, VectorType *SubTp) { // 64-bit packed float vectors (v2f32) are widened to type v4f32. // 64-bit packed integer vectors (v2i32) are widened to type v4i32. std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); @@ -967,10 +967,11 @@ Tp->getVectorElementType()->getPrimitiveSizeInBits()) { assert(NumElts >= NumSubElts && NumElts > OrigSubElts && "Unexpected number of elements!"); - Type *VecTy = VectorType::get(Tp->getVectorElementType(), - LT.second.getVectorNumElements()); - Type *SubTy = VectorType::get(Tp->getVectorElementType(), - SubLT.second.getVectorNumElements()); + VectorType *VecTy = VectorType::get(Tp->getVectorElementType(), + LT.second.getVectorNumElements()); + VectorType *SubTy = + VectorType::get(Tp->getVectorElementType(), + SubLT.second.getVectorNumElements()); int ExtractIndex = alignDown((Index % NumElts), NumSubElts); int ExtractCost = getShuffleCost(TTI::SK_ExtractSubvector, VecTy, ExtractIndex, SubTy); @@ -1041,8 +1042,9 @@ // Number of destination vectors after legalization: unsigned NumOfDests = LT.first; - Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(), - LegalVT.getVectorNumElements()); + VectorType *SingleOpTy = + VectorType::get(Tp->getVectorElementType(), + LegalVT.getVectorNumElements()); unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; return NumOfShuffles * @@ -2647,7 +2649,7 @@ // TODO: Under what circumstances should we shuffle using the full width? int ShuffleCost = 1; if (Opcode == Instruction::InsertElement) { - Type *SubTy = Val; + auto *SubTy = cast(Val); EVT VT = TLI->getValueType(DL, Val); if (VT.getScalarType() != MScalarTy || VT.getSizeInBits() >= 128) SubTy = VectorType::get(ScalarType, SubNumElts); @@ -2795,7 +2797,7 @@ return BaseT::getAddressComputationCost(Ty, SE, Ptr); } -int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy, +int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, bool IsPairwise) { // Just use the default implementation for pair reductions. if (IsPairwise) @@ -2865,8 +2867,9 @@ if (LT.first != 1 && MTy.isVector() && MTy.getVectorNumElements() < ValTy->getVectorNumElements()) { // Type needs to be split. We need LT.first - 1 arithmetic ops. - Type *SingleOpTy = VectorType::get(ValTy->getVectorElementType(), - MTy.getVectorNumElements()); + VectorType *SingleOpTy = + VectorType::get(ValTy->getVectorElementType(), + MTy.getVectorNumElements()); ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy); ArithmeticCost *= LT.first - 1; } @@ -2967,7 +2970,7 @@ unsigned ReductionCost = 0; - Type *Ty = ValTy; + VectorType *Ty = ValTy; if (LT.first != 1 && MTy.isVector() && MTy.getVectorNumElements() < ValTy->getVectorNumElements()) { // Type needs to be split. We need LT.first - 1 arithmetic ops. @@ -2986,13 +2989,14 @@ NumVecElts /= 2; // If we're reducing from 256/512 bits, use an extract_subvector. if (Size > 128) { - Type *SubTy = VectorType::get(ValTy->getVectorElementType(), NumVecElts); + VectorType *SubTy = + VectorType::get(ValTy->getVectorElementType(), NumVecElts); ReductionCost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy); Ty = SubTy; } else if (Size == 128) { // Reducing from 128 bits is a permute of v2f64/v2i64. - Type *ShufTy; + VectorType *ShufTy; if (ValTy->isFloatingPointTy()) ShufTy = VectorType::get(Type::getDoubleTy(ValTy->getContext()), 2); else @@ -3001,7 +3005,7 @@ getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr); } else if (Size == 64) { // Reducing from 64 bits is a shuffle of v4f32/v4i32. - Type *ShufTy; + VectorType *ShufTy; if (ValTy->isFloatingPointTy()) ShufTy = VectorType::get(Type::getFloatTy(ValTy->getContext()), 4); else @@ -3010,7 +3014,7 @@ getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr); } else { // Reducing from smaller size is a shift by immediate. - Type *ShiftTy = VectorType::get( + VectorType *ShiftTy = VectorType::get( Type::getIntNTy(ValTy->getContext(), Size), 128 / Size); ReductionCost += getArithmeticInstrCost( Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue, @@ -3148,7 +3152,7 @@ getCmpSelInstrCost(Instruction::Select, Ty, CondTy, nullptr); } -int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy, +int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, bool IsPairwise, bool IsUnsigned) { // Just use the default implementation for pair reductions. if (IsPairwise) @@ -3232,15 +3236,15 @@ unsigned NumVecElts = ValTy->getVectorNumElements(); - Type *Ty = ValTy; + VectorType *Ty = ValTy; unsigned MinMaxCost = 0; if (LT.first != 1 && MTy.isVector() && MTy.getVectorNumElements() < ValTy->getVectorNumElements()) { // Type needs to be split. We need LT.first - 1 operations ops. Ty = VectorType::get(ValTy->getVectorElementType(), MTy.getVectorNumElements()); - Type *SubCondTy = VectorType::get(CondTy->getVectorElementType(), - MTy.getVectorNumElements()); + VectorType *SubCondTy = VectorType::get(CondTy->getVectorElementType(), + MTy.getVectorNumElements()); MinMaxCost = getMinMaxCost(Ty, SubCondTy, IsUnsigned); MinMaxCost *= LT.first - 1; NumVecElts = MTy.getVectorNumElements(); @@ -3278,13 +3282,13 @@ NumVecElts /= 2; // If we're reducing from 256/512 bits, use an extract_subvector. if (Size > 128) { - Type *SubTy = VectorType::get(ValTy->getVectorElementType(), NumVecElts); + VectorType *SubTy = VectorType::get(ValTy->getVectorElementType(), NumVecElts); MinMaxCost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy); Ty = SubTy; } else if (Size == 128) { // Reducing from 128 bits is a permute of v2f64/v2i64. - Type *ShufTy; + VectorType *ShufTy; if (ValTy->isFloatingPointTy()) ShufTy = VectorType::get(Type::getDoubleTy(ValTy->getContext()), 2); else @@ -3293,7 +3297,7 @@ getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr); } else if (Size == 64) { // Reducing from 64 bits is a shuffle of v4f32/v4i32. - Type *ShufTy; + VectorType *ShufTy; if (ValTy->isFloatingPointTy()) ShufTy = VectorType::get(Type::getFloatTy(ValTy->getContext()), 4); else @@ -3302,7 +3306,7 @@ getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr); } else { // Reducing from smaller size is a shift by immediate. - Type *ShiftTy = VectorType::get( + VectorType *ShiftTy = VectorType::get( Type::getIntNTy(ValTy->getContext(), Size), 128 / Size); MinMaxCost += getArithmeticInstrCost( Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue, @@ -3311,8 +3315,8 @@ } // Add the arithmetic op for this level. - Type *SubCondTy = VectorType::get(CondTy->getVectorElementType(), - Ty->getVectorNumElements()); + VectorType *SubCondTy = VectorType::get(CondTy->getVectorElementType(), + Ty->getVectorNumElements()); MinMaxCost += getMinMaxCost(Ty, SubCondTy, IsUnsigned); } @@ -4031,8 +4035,8 @@ unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize; // Get the cost of one memory operation. - Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(), - LegalVT.getVectorNumElements()); + VectorType *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(), + LegalVT.getVectorNumElements()); unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace); Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -5845,7 +5845,7 @@ unsigned LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); - Type *VectorTy = ToVectorTy(ValTy, VF); + auto *VectorTy = cast(ToVectorTy(ValTy, VF)); Value *Ptr = getLoadStorePointerOperand(I); unsigned AS = getLoadStoreAddressSpace(I); int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); @@ -5869,7 +5869,7 @@ unsigned LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); - Type *VectorTy = ToVectorTy(ValTy, VF); + auto *VectorTy = cast(ToVectorTy(ValTy, VF)); const MaybeAlign Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); if (isa(I)) { @@ -5891,7 +5891,7 @@ unsigned LoopVectorizationCostModel::getGatherScatterCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); - Type *VectorTy = ToVectorTy(ValTy, VF); + auto *VectorTy = cast(ToVectorTy(ValTy, VF)); const MaybeAlign Alignment = getLoadStoreAlignment(I); Value *Ptr = getLoadStorePointerOperand(I); @@ -5904,14 +5904,14 @@ unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, unsigned VF) { Type *ValTy = getMemInstValueType(I); - Type *VectorTy = ToVectorTy(ValTy, VF); + auto *VectorTy = cast(ToVectorTy(ValTy, VF)); unsigned AS = getLoadStoreAddressSpace(I); auto Group = getInterleavedAccessGroup(I); assert(Group && "Fail to get an interleaved access group."); unsigned InterleaveFactor = Group->getFactor(); - Type *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor); + VectorType *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor); // Holds the indices of existing members in an interleaved load group. // An interleaved store group doesn't need this as it doesn't allow gaps. @@ -6215,7 +6215,8 @@ // NOTE: Don't use ToVectorTy as SK_ExtractSubvector expects a vector type. if (VF > 1 && Legal->isFirstOrderRecurrence(Phi)) return TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector, - VectorTy, VF - 1, VectorType::get(RetTy, 1)); + cast(VectorTy), VF - 1, + VectorType::get(RetTy, 1)); // Phi nodes in non-header blocks (not inductions, reductions, etc.) are // converted into select instructions. We require N - 1 selects per phi Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1400,7 +1400,8 @@ /// \returns the scalarization cost for this type. Scalarization in this /// context means the creation of vectors from a group of scalars. - int getGatherCost(Type *Ty, const DenseSet &ShuffledIndices) const; + int getGatherCost(VectorType *Ty, + const DenseSet &ShuffledIndices) const; /// \returns the scalarization cost for this list of values. Assuming that /// this subtree gets vectorized, we may need to extract the values from the @@ -3871,10 +3872,10 @@ return Cost; } -int BoUpSLP::getGatherCost(Type *Ty, +int BoUpSLP::getGatherCost(VectorType *Ty, const DenseSet &ShuffledIndices) const { int Cost = 0; - for (unsigned i = 0, e = cast(Ty)->getNumElements(); i < e; ++i) + for (unsigned i = 0, e = Ty->getNumElements(); i < e; ++i) if (!ShuffledIndices.count(i)) Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); if (!ShuffledIndices.empty()) @@ -6892,7 +6893,7 @@ int getReductionCost(TargetTransformInfo *TTI, Value *FirstReducedVal, unsigned ReduxWidth) { Type *ScalarTy = FirstReducedVal->getType(); - Type *VecTy = VectorType::get(ScalarTy, ReduxWidth); + VectorType *VecTy = VectorType::get(ScalarTy, ReduxWidth); int PairwiseRdxCost; int SplittingRdxCost; @@ -6909,7 +6910,7 @@ case RK_Max: case RK_UMin: case RK_UMax: { - Type *VecCondTy = CmpInst::makeCmpResultType(VecTy); + auto *VecCondTy = cast(CmpInst::makeCmpResultType(VecTy)); bool IsUnsigned = ReductionData.getKind() == RK_UMin || ReductionData.getKind() == RK_UMax; PairwiseRdxCost = Index: llvm/lib/Transforms/Vectorize/VectorCombine.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -58,7 +58,7 @@ isa(Ext1->getOperand(1)) && "Expected constant extract indexes"); Type *ScalarTy = Ext0->getType(); - Type *VecTy = Ext0->getOperand(0)->getType(); + VectorType *VecTy = cast(Ext0->getOperand(0)->getType()); int ScalarOpCost, VectorOpCost; // Get cost estimates for scalar and vector versions of the operation.