diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -38,6 +38,7 @@ class BlockFrequencyInfo; class DominatorTree; class BranchInst; +class CallBase; class Function; class GlobalValue; class IntrinsicInst; @@ -120,10 +121,12 @@ public: IntrinsicCostAttributes(const IntrinsicInst &I); - IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, + IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI); + + IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, unsigned Factor); - IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, + IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI, unsigned Factor, unsigned ScalarCost); IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, @@ -141,7 +144,7 @@ IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef Tys); - IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty, + IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, ArrayRef Args); Intrinsic::ID getID() const { return IID; } @@ -288,18 +291,6 @@ /// scientific. A target may has no bonus on vector instructions. int getInlinerVectorBonusPercent() const; - /// Estimate the cost of an intrinsic when lowered. - int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, - const User *U = nullptr, - TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const; - - /// Estimate the cost of an intrinsic when lowered. - int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, - const User *U = nullptr, - TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const; - /// \return the expected cost of a memcpy, which could e.g. depend on the /// source/destination type and alignment and the number of bytes copied. int getMemcpyCost(const Instruction *I) const; @@ -1231,13 +1222,6 @@ TTI::TargetCostKind CostKind) = 0; virtual unsigned getInliningThresholdMultiplier() = 0; virtual int getInlinerVectorBonusPercent() = 0; - virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U, - enum TargetCostKind CostKind) = 0; - virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, - const User *U, - enum TargetCostKind CostKind) = 0; virtual int getMemcpyCost(const Instruction *I) = 0; virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, @@ -1495,18 +1479,6 @@ int getInlinerVectorBonusPercent() override { return Impl.getInlinerVectorBonusPercent(); } - int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, - const User *U = nullptr, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override { - return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind); - } - int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, - const User *U = nullptr, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override { - return Impl.getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); - } int getMemcpyCost(const Instruction *I) override { return Impl.getMemcpyCost(I); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -462,6 +462,39 @@ unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + switch (ICA.getID()) { + default: + break; + case Intrinsic::annotation: + case Intrinsic::assume: + case Intrinsic::sideeffect: + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::dbg_label: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::is_constant: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + case Intrinsic::experimental_gc_result: + case Intrinsic::experimental_gc_relocate: + case Intrinsic::coro_alloc: + case Intrinsic::coro_begin: + case Intrinsic::coro_free: + case Intrinsic::coro_end: + case Intrinsic::coro_frame: + case Intrinsic::coro_size: + case Intrinsic::coro_suspend: + case Intrinsic::coro_param: + case Intrinsic::coro_subfn_addr: + // These intrinsics don't actually represent code after lowering. + return 0; + } return 1; } @@ -739,78 +772,32 @@ return TTI::TCC_Basic; } - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U, - TTI::TargetCostKind TCK_SizeAndLatency) { - switch (IID) { - default: - // Intrinsics rarely (if ever) have normal argument setup constraints. - // Model them as having a basic instruction cost. - return TTI::TCC_Basic; - - // TODO: other libc intrinsics. - case Intrinsic::memcpy: - return static_cast(this)->getMemcpyCost(dyn_cast(U)); - - case Intrinsic::annotation: - case Intrinsic::assume: - case Intrinsic::sideeffect: - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::dbg_label: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::launder_invariant_group: - case Intrinsic::strip_invariant_group: - case Intrinsic::is_constant: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - case Intrinsic::experimental_gc_result: - case Intrinsic::experimental_gc_relocate: - case Intrinsic::coro_alloc: - case Intrinsic::coro_begin: - case Intrinsic::coro_free: - case Intrinsic::coro_end: - case Intrinsic::coro_frame: - case Intrinsic::coro_size: - case Intrinsic::coro_suspend: - case Intrinsic::coro_param: - case Intrinsic::coro_subfn_addr: - // These intrinsics don't actually represent code after lowering. - return TTI::TCC_Free; - } - } - - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, const User *U, - TTI::TargetCostKind CostKind) { - // Delegate to the generic intrinsic handling code. This mostly provides an - // opportunity for targets to (for example) special case the cost of - // certain intrinsics based on constants used as arguments. - SmallVector ParamTys; - ParamTys.reserve(Arguments.size()); - for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) - ParamTys.push_back(Arguments[Idx]->getType()); - return static_cast(this)->getIntrinsicCost(IID, RetTy, ParamTys, U, - CostKind); - } - - unsigned getUserCost(const User *U, ArrayRef Operands, - TTI::TargetCostKind CostKind) { + int getUserCost(const User *U, ArrayRef Operands, + TTI::TargetCostKind CostKind) { auto *TargetTTI = static_cast(this); - // FIXME: Unlikely to be true for anything but CodeSize. if (const auto *CB = dyn_cast(U)) { + // Special-case throughput here because it can make wild differences + // whether the arguments are passed around or just the arg types. The + // IntrinsicCostAttribute constructor used here will save all the + // available information. + // FIXME: More information isn't always useful and we shouldn't have to + // make a special case like this. + if (CostKind == TTI::TCK_RecipThroughput) { + if (auto *II = dyn_cast(CB)) { + IntrinsicCostAttributes Attrs(*II); + return TargetTTI->getIntrinsicInstrCost(Attrs, CostKind); + } + return -1; // We know nothing about this call. + } + + // FIXME: Unlikely to be true for anything but CodeSize. const Function *F = CB->getCalledFunction(); if (F) { FunctionType *FTy = F->getFunctionType(); if (Intrinsic::ID IID = F->getIntrinsicID()) { - SmallVector ParamTys(FTy->param_begin(), FTy->param_end()); - return TargetTTI->getIntrinsicCost(IID, FTy->getReturnType(), - ParamTys, U, CostKind); + IntrinsicCostAttributes Attrs(IID, *CB); + return TargetTTI->getIntrinsicInstrCost(Attrs, CostKind); } if (!TargetTTI->isLoweredToCall(F)) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -296,30 +296,6 @@ return BaseT::getGEPCost(PointeeType, Ptr, Operands); } - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, const User *U, - TTI::TargetCostKind CostKind) { - return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); - } - - unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U, - TTI::TargetCostKind CostKind) { - if (IID == Intrinsic::cttz) { - if (getTLI()->isCheapToSpeculateCttz()) - return TargetTransformInfo::TCC_Basic; - return TargetTransformInfo::TCC_Expensive; - } - - if (IID == Intrinsic::ctlz) { - if (getTLI()->isCheapToSpeculateCtlz()) - return TargetTransformInfo::TCC_Basic; - return TargetTransformInfo::TCC_Expensive; - } - - return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind); - } - unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, @@ -1091,21 +1067,40 @@ unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + Intrinsic::ID IID = ICA.getID(); + auto *ConcreteTTI = static_cast(this); + + // Special case some scalar intrinsics. + if (CostKind != TTI::TCK_RecipThroughput) { + switch (IID) { + default: + break; + case Intrinsic::cttz: + if (getTLI()->isCheapToSpeculateCttz()) + return TargetTransformInfo::TCC_Basic; + break; + case Intrinsic::ctlz: + if (getTLI()->isCheapToSpeculateCtlz()) + return TargetTransformInfo::TCC_Basic; + break; + case Intrinsic::memcpy: + return ConcreteTTI->getMemcpyCost(ICA.getInst()); + } + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + } + // TODO: Combine these two logic paths. if (ICA.isTypeBasedOnly()) return getTypeBasedIntrinsicInstrCost(ICA, CostKind); - Intrinsic::ID IID = ICA.getID(); const IntrinsicInst *I = ICA.getInst(); - Type *RetTy = ICA.getReturnType(); const SmallVectorImpl &Args = ICA.getArgs(); - unsigned VF = ICA.getVectorFactor(); FastMathFlags FMF = ICA.getFlags(); - + Type *RetTy = ICA.getReturnType(); + unsigned VF = ICA.getVectorFactor(); unsigned RetVF = (RetTy->isVectorTy() ? cast(RetTy)->getNumElements() : 1); assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); - auto *ConcreteTTI = static_cast(this); switch (IID) { default: { @@ -1592,13 +1587,14 @@ CostKind) + ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy, CostKind); - if (IID == Intrinsic::experimental_constrained_fmuladd) - return ConcreteTTI->getIntrinsicCost( - Intrinsic::experimental_constrained_fmul, RetTy, Tys, nullptr, - CostKind) + - ConcreteTTI->getIntrinsicCost( - Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr, - CostKind); + if (IID == Intrinsic::experimental_constrained_fmuladd) { + IntrinsicCostAttributes FMulAttrs( + Intrinsic::experimental_constrained_fmul, RetTy, Tys); + IntrinsicCostAttributes FAddAttrs( + Intrinsic::experimental_constrained_fadd, RetTy, Tys); + return ConcreteTTI->getIntrinsicInstrCost(FMulAttrs, CostKind) + + ConcreteTTI->getIntrinsicInstrCost(FAddAttrs, CostKind); + } // Else, assume that we need to scalarize this intrinsic. For math builtins // this will emit a costly libcall, adding call overhead and spills. Make it diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -63,7 +63,20 @@ FMF = FPMO->getFastMathFlags(); } -IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, + const CallBase &CI) : + II(dyn_cast(&CI)), RetTy(CI.getType()), IID(Id) { + + if (auto *FPMO = dyn_cast(&CI)) + FMF = FPMO->getFastMathFlags(); + + FunctionType *FTy = + CI.getCalledFunction()->getFunctionType(); + ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); +} + +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, + const CallBase &CI, unsigned Factor) : RetTy(CI.getType()), IID(Id), VF(Factor) { @@ -76,7 +89,8 @@ ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end()); } -IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, CallInst &CI, +IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, + const CallBase &CI, unsigned Factor, unsigned ScalarCost) : RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) { @@ -236,15 +250,6 @@ return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind); } -int TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, - const User *U, - TTI::TargetCostKind CostKind) const { - int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); - assert(Cost >= 0 && "TTI should not produce negative costs!"); - return Cost; -} - unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters( const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const { @@ -1416,11 +1421,7 @@ return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr); } case Instruction::Call: - if (const IntrinsicInst *II = dyn_cast(I)) { - IntrinsicCostAttributes CostAttrs(*II); - return getIntrinsicInstrCost(CostAttrs, CostKind); - } - return -1; + return getUserCost(I, CostKind); default: // We don't have any information on this instruction. return -1; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -560,6 +560,9 @@ int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + if (ICA.getID() == Intrinsic::fabs) + return 0; + if (!intrinsicHasPackedVectorBenefit(ICA.getID())) return BaseT::getIntrinsicInstrCost(ICA, CostKind); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2699,6 +2699,9 @@ int X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + if (CostKind != TTI::TCK_RecipThroughput) + return 1; + if (ICA.isTypeBasedOnly()) return getTypeBasedIntrinsicInstrCost(ICA, CostKind); @@ -3932,6 +3935,9 @@ unsigned Alignment, TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { + if (CostKind != TTI::TCK_RecipThroughput) + return 1; + assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter"); unsigned VF = cast(SrcVTy)->getNumElements(); PointerType *PtrTy = dyn_cast(Ptr->getType()); diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1535,7 +1535,7 @@ // %inc = add nsw %i.0, 1 // br i1 %tobool - const Value *Args[] = + Value *Args[] = {InitX, ZeroCheck ? ConstantInt::getTrue(InitX->getContext()) : ConstantInt::getFalse(InitX->getContext())}; @@ -1544,9 +1544,11 @@ uint32_t HeaderSize = std::distance(InstWithoutDebugIt.begin(), InstWithoutDebugIt.end()); + IntrinsicCostAttributes Attrs(IntrinID, InitX->getType(), Args); + int Cost = + TTI->getIntrinsicInstrCost(Attrs, TargetTransformInfo::TCK_SizeAndLatency); if (HeaderSize != IdiomCanonicalSize && - TTI->getIntrinsicCost(IntrinID, InitX->getType(), Args) > - TargetTransformInfo::TCC_Basic) + Cost > TargetTransformInfo::TCC_Basic) return false; transformLoopToCountable(IntrinID, PH, CntInst, CntPhi, InitX, DefX,