diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -105,6 +105,63 @@ bool canAnalyze(LoopInfo &LI); }; +class IntrinsicCostAttributes { + const IntrinsicInst *II = nullptr; + Type *RetTy = nullptr; + Intrinsic::ID IID; + SmallVector ParamTys; + SmallVector Arguments; + FastMathFlags FMF; + unsigned VF = 1; + // If ScalarizationCost is UINT_MAX, the cost of scalarizing the + // arguments and the return value will be computed based on types. + unsigned ScalarizationCost = std::numeric_limits::max(); + +public: + IntrinsicCostAttributes(const IntrinsicInst &I); + + IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, + unsigned Factor); + + IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, + unsigned Factor, unsigned ScalarCost); + + IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, + ArrayRef Tys, FastMathFlags Flags); + + IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, + ArrayRef Tys, FastMathFlags Flags, + unsigned ScalarCost); + + IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, + ArrayRef Tys, FastMathFlags Flags, + unsigned ScalarCost, + const IntrinsicInst *I); + + IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, + ArrayRef Tys); + + IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty, + ArrayRef Args); + + Intrinsic::ID getID() const { return IID; } + const IntrinsicInst *getInst() const { return II; } + Type *getReturnType() const { return RetTy; } + unsigned getVectorFactor() const { return VF; } + FastMathFlags getFlags() const { return FMF; } + unsigned getScalarizationCost() const { return ScalarizationCost; } + const SmallVectorImpl &getArgs() const { return Arguments; } + const SmallVectorImpl &getArgTypes() const { return ParamTys; } + + bool isTypeBasedOnly() const { + return Arguments.empty(); + } + + bool skipScalarizationCost() const { + return ScalarizationCost != std::numeric_limits::max(); + } +}; + class TargetTransformInfo; typedef TargetTransformInfo TTI; @@ -994,25 +1051,9 @@ /// \returns The cost of Intrinsic instructions. Analyses the real arguments. /// Three cases are handled: 1. scalar instruction 2. vector instruction - /// 3. scalar instruction which is to be vectorized with VF. - /// I is the optional original context instruction holding the call to the - /// intrinsic - int getIntrinsicInstrCost( - Intrinsic::ID ID, Type *RetTy, ArrayRef Args, - FastMathFlags FMF, unsigned VF = 1, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr) const; - - /// \returns The cost of Intrinsic instructions. Types analysis only. - /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the - /// arguments and the return value will be computed based on types. - /// I is the optional original context instruction holding the call to the - /// intrinsic - int getIntrinsicInstrCost( - Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr) const; + /// 3. scalar instruction which is to be vectorized. + int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) const; /// \returns The cost of Call instructions. int getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, @@ -1382,16 +1423,8 @@ virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind) = 0; - virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed, - TTI::TargetCostKind CostKind, - const Instruction *I) = 0; - virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF, - TTI::TargetCostKind CostKind, - const Instruction *I) = 0; + virtual int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) = 0; virtual int getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, TTI::TargetCostKind CostKind) = 0; @@ -1828,19 +1861,9 @@ return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, CostKind); } - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, - FastMathFlags FMF, unsigned ScalarizationCostPassed, - TTI::TargetCostKind CostKind, - const Instruction *I) override { - return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, CostKind, I); - } - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF, - TTI::TargetCostKind CostKind, - const Instruction *I) override { - return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); + int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) override { + return Impl.getIntrinsicInstrCost(ICA, CostKind); } int getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -464,17 +464,8 @@ return 1; } - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed, - TTI::TargetCostKind CostKind, - const Instruction *I) { - return 1; - } - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF, TTI::TargetCostKind CostKind, - const Instruction *I) { + unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { return 1; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1091,11 +1091,19 @@ } /// Get intrinsic cost based on arguments. - unsigned getIntrinsicInstrCost( - Intrinsic::ID IID, Type *RetTy, ArrayRef Args, - FastMathFlags FMF, unsigned VF = 1, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr) { + unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { + + // TODO: Combine these two logic paths. + if (ICA.isTypeBasedOnly()) + return getTypeBasedIntrinsicInstrCost(ICA, CostKind); + + Intrinsic::ID IID = ICA.getID(); + const IntrinsicInst *I = ICA.getInst(); + Type *RetTy = ICA.getReturnType(); + const SmallVectorImpl &Args = ICA.getArgs(); + unsigned VF = ICA.getVectorFactor(); + FastMathFlags FMF = ICA.getFlags(); unsigned RetVF = (RetTy->isVectorTy() ? cast(RetTy)->getNumElements() : 1); @@ -1127,9 +1135,9 @@ ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); } - return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, - ScalarizationCost, CostKind, - I); + IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, + ScalarizationCost, I); + return ConcreteTTI->getIntrinsicInstrCost(Attrs, CostKind); } case Intrinsic::masked_scatter: { assert(VF == 1 && "Can't vectorize types here."); @@ -1161,9 +1169,10 @@ case Intrinsic::experimental_vector_reduce_fmax: case Intrinsic::experimental_vector_reduce_fmin: case Intrinsic::experimental_vector_reduce_umax: - case Intrinsic::experimental_vector_reduce_umin: - return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF, 1, - CostKind, I); + case Intrinsic::experimental_vector_reduce_umin: { + IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I); + return getIntrinsicInstrCost(Attrs, CostKind); + } case Intrinsic::fshl: case Intrinsic::fshr: { Value *X = Args[0]; @@ -1213,12 +1222,18 @@ /// If ScalarizationCostPassed is std::numeric_limits::max(), the /// cost of scalarizing the arguments and the return value will be computed /// based on types. - unsigned getIntrinsicInstrCost( - Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = std::numeric_limits::max(), - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr) { + unsigned getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { + auto *ConcreteTTI = static_cast(this); + + Intrinsic::ID IID = ICA.getID(); + Type *RetTy = ICA.getReturnType(); + const SmallVectorImpl &Tys = ICA.getArgTypes(); + FastMathFlags FMF = ICA.getFlags(); + unsigned ScalarizationCostPassed = ICA.getScalarizationCost(); + bool SkipScalarizationCost = ICA.skipScalarizationCost(); + auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast(Tys[0]); SmallVector ISDs; @@ -1230,7 +1245,7 @@ unsigned ScalarCalls = 1; Type *ScalarRetTy = RetTy; if (auto *RetVTy = dyn_cast(RetTy)) { - if (ScalarizationCostPassed == std::numeric_limits::max()) + if (!SkipScalarizationCost) ScalarizationCost = getScalarizationOverhead(RetVTy, true, false); ScalarCalls = std::max(ScalarCalls, RetVTy->getNumElements()); ScalarRetTy = RetTy->getScalarType(); @@ -1239,7 +1254,7 @@ for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { Type *Ty = Tys[i]; if (auto *VTy = dyn_cast(Ty)) { - if (ScalarizationCostPassed == std::numeric_limits::max()) + if (!SkipScalarizationCost) ScalarizationCost += getScalarizationOverhead(VTy, false, true); ScalarCalls = std::max(ScalarCalls, VTy->getNumElements()); Ty = Ty->getScalarType(); @@ -1249,9 +1264,9 @@ if (ScalarCalls == 1) return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. + IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF); unsigned ScalarCost = - ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF, - CostKind); + ConcreteTTI->getIntrinsicInstrCost(ScalarAttrs, CostKind); return ScalarCalls * ScalarCost + ScalarizationCost; } @@ -1397,9 +1412,9 @@ // SatMax -> Overflow && SumDiff < 0 // SatMin -> Overflow && SumDiff >= 0 unsigned Cost = 0; - Cost += ConcreteTTI->getIntrinsicInstrCost( - OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed, - CostKind); + IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, + ScalarizationCostPassed); + Cost += ConcreteTTI->getIntrinsicInstrCost(Attrs, CostKind); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, CostKind); Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, @@ -1416,9 +1431,9 @@ : Intrinsic::usub_with_overflow; unsigned Cost = 0; - Cost += ConcreteTTI->getIntrinsicInstrCost( - OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed, - CostKind); + IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, + ScalarizationCostPassed); + Cost += ConcreteTTI->getIntrinsicInstrCost(Attrs, CostKind); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, CostKind); return Cost; @@ -1592,10 +1607,9 @@ // this will emit a costly libcall, adding call overhead and spills. Make it // very expensive. if (auto *RetVTy = dyn_cast(RetTy)) { - unsigned ScalarizationCost = - ((ScalarizationCostPassed != std::numeric_limits::max()) - ? ScalarizationCostPassed - : getScalarizationOverhead(RetVTy, true, false)); + unsigned ScalarizationCost = SkipScalarizationCost ? + ScalarizationCostPassed : getScalarizationOverhead(RetVTy, true, false); + unsigned ScalarCalls = RetVTy->getNumElements(); SmallVector ScalarTys; for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { @@ -1604,11 +1618,11 @@ Ty = Ty->getScalarType(); ScalarTys.push_back(Ty); } - unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( - IID, RetTy->getScalarType(), ScalarTys, FMF, CostKind); + IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF); + unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(Attrs, CostKind); for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { if (auto *VTy = dyn_cast(Tys[i])) { - if (ScalarizationCostPassed == std::numeric_limits::max()) + if (!ICA.skipScalarizationCost()) ScalarizationCost += getScalarizationOverhead(VTy, false, true); ScalarCalls = std::max(ScalarCalls, VTy->getNumElements()); } diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -53,6 +53,83 @@ return true; } +IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) : + II(&I), RetTy(I.getType()), IID(I.getIntrinsicID()) { + + FunctionType *FTy = I.getCalledFunction()->getFunctionType(); + ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); + Arguments.insert(Arguments.begin(), I.arg_begin(), I.arg_end()); + if (auto *FPMO = dyn_cast(&I)) + FMF = FPMO->getFastMathFlags(); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, + unsigned Factor) : + RetTy(CI.getType()), IID(Id), VF(Factor) { + + if (auto *FPMO = dyn_cast(&CI)) + FMF = FPMO->getFastMathFlags(); + + Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); + FunctionType *FTy = + CI.getCalledFunction()->getFunctionType(); + ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, + unsigned Factor, + unsigned ScalarCost) : + RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) { + + if (auto *FPMO = dyn_cast(&CI)) + FMF = FPMO->getFastMathFlags(); + + Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end()); + FunctionType *FTy = + CI.getCalledFunction()->getFunctionType(); + ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, + ArrayRef Tys, + FastMathFlags Flags) : + RetTy(RTy), IID(Id), FMF(Flags) { + ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, + ArrayRef Tys, + FastMathFlags Flags, + unsigned ScalarCost) : + RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) { + ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, + ArrayRef Tys, + FastMathFlags Flags, + unsigned ScalarCost, + const IntrinsicInst *I) : + II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) { + ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, + ArrayRef Tys) : + RetTy(RTy), IID(Id) { + ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty, + ArrayRef Args) : + RetTy(Ty), IID(Id) { + + Arguments.insert(Arguments.begin(), Args.begin(), Args.end()); + ParamTys.reserve(Arguments.size()); + for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) + ParamTys.push_back(Arguments[Idx]->getType()); +} + bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop, @@ -702,26 +779,10 @@ return Cost; } -int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys, - FastMathFlags FMF, - unsigned ScalarizationCostPassed, - TTI::TargetCostKind CostKind, - const Instruction *I) const { - int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, CostKind, - I); - assert(Cost >= 0 && "TTI should not produce negative costs!"); - return Cost; -} - -int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, - FastMathFlags FMF, unsigned VF, - TTI::TargetCostKind CostKind, - const Instruction *I) const { - int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, - CostKind, I); +int +TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntrinsicInstrCost(ICA, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -1361,14 +1422,8 @@ } case Instruction::Call: if (const IntrinsicInst *II = dyn_cast(I)) { - SmallVector Args(II->arg_operands()); - - FastMathFlags FMF; - if (auto *FPMO = dyn_cast(II)) - FMF = FPMO->getFastMathFlags(); - - return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, - FMF, 1, CostKind, II); + IntrinsicCostAttributes CostAttrs(*II); + return getIntrinsicInstrCost(CostAttrs, CostKind); } return -1; default: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -232,20 +232,8 @@ bool IsPairwise, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); - template - int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Args, - FastMathFlags FMF, unsigned VF, - TTI::TargetCostKind CostKind, - const Instruction *I = nullptr); - int getIntrinsicInstrCost( - Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr); - int getIntrinsicInstrCost( - Intrinsic::ID IID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr); + int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind); int getMinMaxReductionCost( VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -558,18 +558,15 @@ } } -template -int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF, - TTI::TargetCostKind CostKind, - const Instruction *I) { - if (!intrinsicHasPackedVectorBenefit(ID)) - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); +int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { + if (!intrinsicHasPackedVectorBenefit(ICA.getID())) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + Type *RetTy = ICA.getReturnType(); EVT OrigTy = TLI->getValueType(DL, RetTy); if (!OrigTy.isSimple()) { - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); + return BaseT::getIntrinsicInstrCost(ICA, CostKind); } // Legalize the type. @@ -588,7 +585,7 @@ // TODO: Get more refined intrinsic costs? unsigned InstRate = getQuarterRateInstrCost(); - if (ID == Intrinsic::fma) { + if (ICA.getID() == Intrinsic::fma) { InstRate = ST->hasFastFMAF32() ? getHalfRateInstrCost() : getQuarterRateInstrCost(); } @@ -596,23 +593,6 @@ return LT.first * NElts * InstRate; } -int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF, - TTI::TargetCostKind CostKind, - const Instruction *I) { - return getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); -} - -int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed, - TTI::TargetCostKind CostKind, - const Instruction *I) { - return getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, CostKind, I); -} - unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) { // XXX - For some reason this isn't called for switch. @@ -981,12 +961,8 @@ } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast(U)) { - SmallVector Args(II->arg_operands()); - FastMathFlags FMF; - if (auto *FPMO = dyn_cast(II)) - FMF = FPMO->getFastMathFlags(); - return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, - FMF, 1, CostKind, II); + IntrinsicCostAttributes CostAttrs(*II); + return getIntrinsicInstrCost(CostAttrs, CostKind); } else { return BaseT::getUserCost(U, Operands, CostKind); } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -107,16 +107,8 @@ unsigned VF); unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, TTI::TargetCostKind CostKind); - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF, - TTI::TargetCostKind CostKind, - const Instruction *I); - unsigned getIntrinsicInstrCost( - Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr); + unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind); unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *S); unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -131,26 +131,14 @@ return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind); } -unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, - FastMathFlags FMF, unsigned VF, - TTI::TargetCostKind CostKind, - const Instruction *I) { - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); -} - -unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys, - FastMathFlags FMF, - unsigned ScalarizationCostPassed, - TTI::TargetCostKind CostKind, - const Instruction *I) { - if (ID == Intrinsic::bswap) { - std::pair LT = TLI.getTypeLegalizationCost(DL, RetTy); +unsigned +HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { + if (ICA.getID() == Intrinsic::bswap) { + std::pair LT = TLI.getTypeLegalizationCost(DL, ICA.getReturnType()); return LT.first + 2; } - return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, CostKind, I); + return BaseT::getIntrinsicInstrCost(ICA, CostKind); } unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp, diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -119,17 +119,8 @@ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); - unsigned getIntrinsicInstrCost( - Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr); - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr); + unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind); /// @} }; diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -953,24 +953,9 @@ return Cost; } -unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, - FastMathFlags FMF, unsigned VF, - TTI::TargetCostKind CostKind, - const Instruction *I) { - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); -} - -unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys, - FastMathFlags FMF, - unsigned ScalarizationCostPassed, - TTI::TargetCostKind CostKind, - const Instruction *I) { - if (ID == Intrinsic::bswap && ST->hasP9Vector()) - return TLI->getTypeLegalizationCost(DL, RetTy).first; - return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, CostKind, I); +unsigned PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { + return BaseT::getIntrinsicInstrCost(ICA, CostKind); } bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -110,16 +110,8 @@ bool UseMaskForCond = false, bool UseMaskForGaps = false); - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, - const Instruction *I = nullptr); - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, - FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, - const Instruction *I = nullptr); + int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind); /// @} }; diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -1151,26 +1151,10 @@ return -1; } -int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, - FastMathFlags FMF, unsigned VF, - TTI::TargetCostKind CostKind, - const Instruction *I) { - int Cost = getVectorIntrinsicInstrCost(ID, RetTy); +int SystemZTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { + int Cost = getVectorIntrinsicInstrCost(ICA.getID(), ICA.getReturnType()); if (Cost != -1) return Cost; - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); -} - -int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys, - FastMathFlags FMF, - unsigned ScalarizationCostPassed, - TTI::TargetCostKind CostKind, - const Instruction *I) { - int Cost = getVectorIntrinsicInstrCost(ID, RetTy); - if (Cost != -1) - return Cost; - return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, CostKind, I); + return BaseT::getIntrinsicInstrCost(ICA, CostKind); } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -153,17 +153,10 @@ unsigned getAtomicMemIntrinsicMaxElementSize() const; - int getIntrinsicInstrCost( - Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, - FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr); - - int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, - const Instruction *I = nullptr); + int getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind); + int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind); int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2231,11 +2231,9 @@ unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } -int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed, - TTI::TargetCostKind CostKind, - const Instruction *I) { +int X86TTIImpl::getTypeBasedIntrinsicInstrCost( + const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + // Costs should match the codegen from: // BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll // BSWAP: llvm\test\CodeGen\X86\bswap-vector.ll @@ -2549,7 +2547,9 @@ { ISD::UADDO, MVT::i8, 1 }, }; + Type *RetTy = ICA.getReturnType(); Type *OpTy = RetTy; + Intrinsic::ID IID = ICA.getID(); unsigned ISD = ISD::DELETED_NODE; switch (IID) { default: @@ -2694,15 +2694,14 @@ return LT.first * Entry->Cost; } - return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, - ScalarizationCostPassed, CostKind, I); + return BaseT::getIntrinsicInstrCost(ICA, CostKind); } -int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF, - TTI::TargetCostKind CostKind, - const Instruction *I) { +int X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, + TTI::TargetCostKind CostKind) { + if (ICA.isTypeBasedOnly()) + return getTypeBasedIntrinsicInstrCost(ICA, CostKind); + static const CostTblEntry AVX512CostTbl[] = { { ISD::ROTL, MVT::v8i64, 1 }, { ISD::ROTL, MVT::v4i64, 1 }, @@ -2753,6 +2752,9 @@ { ISD::FSHL, MVT::i8, 4 } }; + Intrinsic::ID IID = ICA.getID(); + Type *RetTy = ICA.getReturnType(); + const SmallVectorImpl &Args = ICA.getArgs(); unsigned ISD = ISD::DELETED_NODE; switch (IID) { default: @@ -2792,7 +2794,7 @@ return LT.first * Entry->Cost; } - return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF, CostKind, I); + return BaseT::getIntrinsicInstrCost(ICA, CostKind); } int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3322,14 +3322,9 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); assert(ID && "Expected intrinsic call!"); - FastMathFlags FMF; - if (auto *FPMO = dyn_cast(CI)) - FMF = FPMO->getFastMathFlags(); - - SmallVector Operands(CI->arg_operands()); - return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF, - TargetTransformInfo::TCK_RecipThroughput, - CI); + IntrinsicCostAttributes CostAttrs(ID, *CI, VF); + return TTI.getIntrinsicInstrCost(CostAttrs, + TargetTransformInfo::TCK_RecipThroughput); } static Type *smallestIntegerVectorType(Type *T1, Type *T2) { diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3247,13 +3247,9 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); // Calculate the cost of the scalar and vector calls. - FastMathFlags FMF; - if (auto *FPMO = dyn_cast(CI)) - FMF = FPMO->getFastMathFlags(); - - SmallVector Args(CI->arg_operands()); - int IntrinsicCost = TTI->getIntrinsicInstrCost(ID, CI->getType(), Args, FMF, - VecTy->getNumElements()); + IntrinsicCostAttributes CostAttrs(ID, *CI, VecTy->getNumElements()); + int IntrinsicCost = + TTI->getIntrinsicInstrCost(CostAttrs, TTI::TCK_RecipThroughput); auto Shape = VFShape::get(*CI, {static_cast(VecTy->getNumElements()), false}, @@ -3584,16 +3580,8 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); // Calculate the cost of the scalar and vector calls. - SmallVector ScalarTys; - for (unsigned op = 0, opc = CI->getNumArgOperands(); op != opc; ++op) - ScalarTys.push_back(CI->getArgOperand(op)->getType()); - - FastMathFlags FMF; - if (auto *FPMO = dyn_cast(CI)) - FMF = FPMO->getFastMathFlags(); - - int ScalarEltCost = - TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF, 1, CostKind); + IntrinsicCostAttributes CostAttrs(ID, *CI, 1, 1); + int ScalarEltCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; }