diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -105,6 +105,9 @@ bool canAnalyze(LoopInfo &LI); }; +class TargetTransformInfo; +typedef TargetTransformInfo TTI; + /// This pass provides access to the codegen interfaces that are needed /// for IR-level transformations. class TargetTransformInfo { @@ -205,7 +208,8 @@ /// Estimate the cost of a GEP operation when lowered. int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) const; + ArrayRef Operands, + TargetCostKind CostKind = TCK_SizeAndLatency) const; /// Estimate the cost of a EXT operation when lowered. int getExtCost(const Instruction *I, const Value *Src) const; @@ -233,12 +237,14 @@ /// Estimate the cost of an intrinsic when lowered. int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef ParamTys, - const User *U = nullptr) const; + const User *U = nullptr, + TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const; /// Estimate the cost of an intrinsic when lowered. int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments, - const User *U = nullptr) const; + const User *U = nullptr, + TTI::TargetCostKind CostKind = TCK_SizeAndLatency) const; /// \return the expected cost of a memcpy, which could e.g. depend on the /// source/destination type and alignment and the number of bytes copied. @@ -702,15 +708,15 @@ /// Return the expected cost of materializing for the given integer /// immediate of the specified type. - int getIntImmCost(const APInt &Imm, Type *Ty) const; + int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const; /// Return the expected cost of materialization for the given integer /// immediate of the specified type for a given instruction. The cost can be /// zero if the immediate can be folded into the specified instruction. int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty) const; + Type *Ty, TargetCostKind CostKind) const; int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty) const; + Type *Ty, TargetCostKind CostKind) const; /// Return the expected cost for the given integer when optimising /// for size. This is different than the other integer immediate cost @@ -876,7 +882,9 @@ /// \p CxtI is the optional original context instruction, if one exists, to /// provide even more information. int getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue, + unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + OperandValueKind Opd1Info = OK_AnyValue, OperandValueKind Opd2Info = OK_AnyValue, OperandValueProperties Opd1PropInfo = OP_None, OperandValueProperties Opd2PropInfo = OP_None, @@ -895,6 +903,7 @@ /// zext, etc. If there is an existing instruction that holds Opcode, it /// may be passed in the 'I' parameter. int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, const Instruction *I = nullptr) const; /// \return The expected cost of a sign- or zero-extended vector extract. Use @@ -904,12 +913,14 @@ /// \return The expected cost of control-flow related instructions such as /// Phi, Ret, Br. - int getCFInstrCost(unsigned Opcode) const; + int getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const; /// \returns The expected cost of compare and select instructions. If there /// is an existing instruction that holds Opcode, it may be passed in the /// 'I' parameter. int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, const Instruction *I = nullptr) const; /// \return The expected cost of vector Insert and Extract. @@ -919,11 +930,13 @@ /// \return The cost of Load and Store instructions. int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, const Instruction *I = nullptr) const; /// \return The cost of masked Load and Store instructions. - int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const; + int getMaskedMemoryOpCost( + unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; /// \return The cost of Gather or Scatter operation /// \p Opcode - is a type of memory access Load or Store @@ -934,9 +947,10 @@ /// \p Alignment - alignment of single element /// \p I - the optional original context instruction, if one exists, e.g. the /// load/store to transform or the call to the gather/scatter intrinsic - int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, - bool VariableMask, unsigned Alignment, - const Instruction *I = nullptr) const; + int getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; /// \return The cost of the interleaved memory operation. /// \p Opcode is the memory operation code @@ -948,11 +962,11 @@ /// \p AddressSpace is address space of the pointer. /// \p UseMaskForCond indicates if the memory access is predicated. /// \p UseMaskForGaps indicates if gaps should be masked. - int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, - ArrayRef Indices, unsigned Alignment, - unsigned AddressSpace, - bool UseMaskForCond = false, - bool UseMaskForGaps = false) const; + int getInterleavedMemoryOpCost( + unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, + unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + bool UseMaskForCond = false, bool UseMaskForGaps = false) const; /// Calculate the cost of performing a vector reduction. /// @@ -967,33 +981,39 @@ /// Split: /// (v0, v1, v2, v3) /// ((v0+v2), (v1+v3), undef, undef) - int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm) const; - int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwiseForm, bool IsUnsigned) const; + int getArithmeticReductionCost( + unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; + + int getMinMaxReductionCost( + VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; /// \returns The cost of Intrinsic instructions. Analyses the real arguments. /// Three cases are handled: 1. scalar instruction 2. vector instruction /// 3. scalar instruction which is to be vectorized with VF. /// I is the optional original context instruction holding the call to the /// intrinsic - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, - const Instruction *I = nullptr) const; + int getIntrinsicInstrCost( + Intrinsic::ID ID, Type *RetTy, ArrayRef Args, + FastMathFlags FMF, unsigned VF = 1, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; /// \returns The cost of Intrinsic instructions. Types analysis only. /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the /// arguments and the return value will be computed based on types. /// I is the optional original context instruction holding the call to the /// intrinsic - int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, - FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - const Instruction *I = nullptr) const; + int getIntrinsicInstrCost( + Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = UINT_MAX, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) const; /// \returns The cost of Call instructions. - int getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys) const; + int getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const; /// \returns The number of pieces into which the provided type must be /// split during legalization. Zero is returned when the answer is unknown. @@ -1166,15 +1186,18 @@ virtual ~Concept() = 0; virtual const DataLayout &getDataLayout() const = 0; virtual int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) = 0; + ArrayRef Operands, + TTI::TargetCostKind CostKind) = 0; virtual int getExtCost(const Instruction *I, const Value *Src) = 0; virtual unsigned getInliningThresholdMultiplier() = 0; virtual int getInlinerVectorBonusPercent() = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U) = 0; + ArrayRef ParamTys, const User *U, + enum TargetCostKind CostKind) = 0; virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments, - const User *U) = 0; + const User *U, + enum TargetCostKind CostKind) = 0; virtual int getMemcpyCost(const Instruction *I) = 0; virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, @@ -1261,11 +1284,13 @@ virtual int getFPOpCost(Type *Ty) = 0; virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) = 0; - virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0; + virtual int getIntImmCost(const APInt &Imm, Type *Ty, + TargetCostKind CostKind) = 0; virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty) = 0; + Type *Ty, TargetCostKind CostKind) = 0; virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) = 0; + const APInt &Imm, Type *Ty, + TargetCostKind CostKind) = 0; virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0; @@ -1306,47 +1331,65 @@ virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; virtual unsigned getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, + OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI = nullptr) = 0; virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, VectorType *SubTp) = 0; virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) = 0; virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) = 0; - virtual int getCFInstrCost(unsigned Opcode) = 0; + virtual int getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind) = 0; virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I) = 0; virtual int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) = 0; virtual int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I) = 0; + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I) = 0; virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) = 0; - virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, - bool VariableMask, unsigned Alignment, - const Instruction *I = nullptr) = 0; + unsigned AddressSpace, + TTI::TargetCostKind CostKind) = 0; + virtual int getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) = 0; + virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, - unsigned AddressSpace, bool UseMaskForCond = false, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0; virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm) = 0; + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) = 0; virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwiseForm, bool IsUnsigned) = 0; + bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) = 0; virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I) = 0; + unsigned VF, + TTI::TargetCostKind CostKind, + const Instruction *I) = 0; virtual int getCallInstrCost(Function *F, Type *RetTy, - ArrayRef Tys) = 0; + ArrayRef Tys, + TTI::TargetCostKind CostKind) = 0; virtual unsigned getNumberOfParts(Type *Tp) = 0; virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0; @@ -1408,7 +1451,8 @@ } int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) override { + ArrayRef Operands, + enum TargetTransformInfo::TargetCostKind CostKind) override { return Impl.getGEPCost(PointeeType, Ptr, Operands); } int getExtCost(const Instruction *I, const Value *Src) override { @@ -1422,13 +1466,15 @@ } int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef ParamTys, - const User *U = nullptr) override { - return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U); + const User *U = nullptr, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override { + return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind); } int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments, - const User *U = nullptr) override { - return Impl.getIntrinsicCost(IID, RetTy, Arguments, U); + const User *U = nullptr, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) override { + return Impl.getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); } int getMemcpyCost(const Instruction *I) override { return Impl.getMemcpyCost(I); @@ -1613,16 +1659,17 @@ Type *Ty) override { return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); } - int getIntImmCost(const APInt &Imm, Type *Ty) override { - return Impl.getIntImmCost(Imm, Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, + TargetCostKind CostKind) override { + return Impl.getIntImmCost(Imm, Ty, CostKind); } int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, - Type *Ty) override { - return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty); + Type *Ty, TargetCostKind CostKind) override { + return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind); } int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty) override { - return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty); + Type *Ty, TargetCostKind CostKind) override { + return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); } unsigned getNumberOfRegisters(unsigned ClassID) const override { return Impl.getNumberOfRegisters(ClassID); @@ -1698,13 +1745,14 @@ return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI = nullptr) override { - return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); } int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index, @@ -1712,67 +1760,84 @@ return Impl.getShuffleCost(Kind, Tp, Index, SubTp); } int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) override { - return Impl.getCastInstrCost(Opcode, Dst, Src, I); + return Impl.getCastInstrCost(Opcode, Dst, Src, CostKind, I); } int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) override { return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); } - int getCFInstrCost(unsigned Opcode) override { - return Impl.getCFInstrCost(Opcode); + int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override { + return Impl.getCFInstrCost(Opcode, CostKind); } int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I) override { - return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); } int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override { return Impl.getVectorInstrCost(Opcode, Val, Index); } int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I) override { - return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + unsigned AddressSpace, TTI::TargetCostKind CostKind, + const Instruction *I) override { + return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind, I); } int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) override { - return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); - } - int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, - bool VariableMask, unsigned Alignment, - const Instruction *I = nullptr) override { + unsigned AddressSpace, + TTI::TargetCostKind CostKind) override { + return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); + } + int getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) override { return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, - Alignment, I); + Alignment, CostKind, I); } int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, - unsigned AddressSpace, bool UseMaskForCond, + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + bool UseMaskForCond, bool UseMaskForGaps) override { return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm) override { - return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) override { + return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, + CostKind); } int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwiseForm, bool IsUnsigned) override { - return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); + bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind) override { + return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, + CostKind); } int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) override { return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, I); } int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I) override { - return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + unsigned VF, + TTI::TargetCostKind CostKind, + const Instruction *I) override { + return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); } int getCallInstrCost(Function *F, Type *RetTy, - ArrayRef Tys) override { - return Impl.getCallInstrCost(F, RetTy, Tys); + ArrayRef Tys, + TTI::TargetCostKind CostKind) override { + return Impl.getCallInstrCost(F, RetTy, Tys, CostKind); } unsigned getNumberOfParts(Type *Tp) override { return Impl.getNumberOfParts(Tp); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -44,7 +44,8 @@ const DataLayout &getDataLayout() const { return DL; } int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) { + ArrayRef Operands, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { // In the basic model, we just assume that all-constant GEPs will be folded // into their uses via addressing modes. for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) @@ -285,15 +286,19 @@ return 0; } - unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } + unsigned getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { + return TTI::TCC_Basic; + } unsigned getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) { + Type *Ty, TTI::TargetCostKind CostKind) { return TTI::TCC_Free; } unsigned getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { return TTI::TCC_Free; } @@ -366,6 +371,7 @@ unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -381,6 +387,7 @@ } unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { switch (Opcode) { default: @@ -419,10 +426,12 @@ return 1; } - unsigned getCFInstrCost(unsigned Opcode) { return 1; } + unsigned getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind) { return 1; } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - const Instruction *I) { + TTI::TargetCostKind CostKind, + const Instruction *I) const { return 1; } @@ -431,18 +440,21 @@ } unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I) { + unsigned AddressSpace, TTI::TargetCostKind CostKind, + const Instruction *I) const { return 1; } unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, + TTI::TargetCostKind CostKind) { return 1; } - unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, - bool VariableMask, unsigned Alignment, - const Instruction *I = nullptr) { + unsigned getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) { return 1; } @@ -450,24 +462,28 @@ unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, - bool UseMaskForCond = false, - bool UseMaskForGaps = false) { + TTI::TargetCostKind CostKind, + bool UseMaskForCond, + bool UseMaskForGaps) { return 1; } unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) { return 1; } unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I) { + unsigned VF, TTI::TargetCostKind CostKind, + const Instruction *I) { return 1; } - unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys) { + unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, + TTI::TargetCostKind CostKind) { return 1; } @@ -478,9 +494,11 @@ return 0; } - unsigned getArithmeticReductionCost(unsigned, VectorType *, bool) { return 1; } + unsigned getArithmeticReductionCost(unsigned, VectorType *, bool, + TTI::TargetCostKind) { return 1; } - unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool) { return 1; } + unsigned getMinMaxReductionCost(VectorType *, VectorType *, bool, bool, + TTI::TargetCostKind) { return 1; } unsigned getCostOfKeepingLiveOverCall(ArrayRef Tys) { return 0; } @@ -680,7 +698,8 @@ using BaseT::getGEPCost; int getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) { + ArrayRef Operands, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); // TODO: will remove this when pointers have an opaque type. assert(Ptr->getType()->getScalarType()->getPointerElementType() == @@ -738,7 +757,8 @@ } unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U) { + ArrayRef ParamTys, const User *U, + TTI::TargetCostKind TCK_SizeAndLatency) { switch (IID) { default: // Intrinsics rarely (if ever) have normal argument setup constraints. @@ -782,7 +802,8 @@ } unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, const User *U) { + ArrayRef Arguments, const User *U, + TTI::TargetCostKind CostKind) { // Delegate to the generic intrinsic handling code. This mostly provides an // opportunity for targets to (for example) special case the cost of // certain intrinsics based on constants used as arguments. @@ -790,11 +811,12 @@ ParamTys.reserve(Arguments.size()); for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) ParamTys.push_back(Arguments[Idx]->getType()); - return static_cast(this)->getIntrinsicCost(IID, RetTy, ParamTys, U); + return static_cast(this)->getIntrinsicCost(IID, RetTy, ParamTys, U, + CostKind); } unsigned getUserCost(const User *U, ArrayRef Operands, - enum TTI::TargetCostKind CostKind) { + TTI::TargetCostKind CostKind) { auto *TargetTTI = static_cast(this); // FIXME: Unlikely to be true for anything but CodeSize. @@ -805,7 +827,7 @@ if (Intrinsic::ID IID = F->getIntrinsicID()) { SmallVector ParamTys(FTy->param_begin(), FTy->param_end()); return TargetTTI->getIntrinsicCost(IID, FTy->getReturnType(), - ParamTys, U); + ParamTys, U, CostKind); } if (!TargetTTI->isLoweredToCall(F)) @@ -849,12 +871,12 @@ case Instruction::IntToPtr: case Instruction::PtrToInt: case Instruction::Trunc: - if (getCastInstrCost(Opcode, Ty, OpTy, I) == TTI::TCC_Free || - TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, I) == TTI::TCC_Free) + if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free || + TargetTTI->getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free) return TTI::TCC_Free; break; case Instruction::BitCast: - if (getCastInstrCost(Opcode, Ty, OpTy, I) == TTI::TCC_Free) + if (getCastInstrCost(Opcode, Ty, OpTy, CostKind, I) == TTI::TCC_Free) return TTI::TCC_Free; break; case Instruction::FPExt: diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -305,12 +305,14 @@ } unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Arguments, const User *U) { - return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U); + ArrayRef Arguments, const User *U, + TTI::TargetCostKind CostKind) { + return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); } unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef ParamTys, const User *U) { + ArrayRef ParamTys, const User *U, + TTI::TargetCostKind CostKind) { if (IID == Intrinsic::cttz) { if (getTLI()->isCheapToSpeculateCttz()) return TargetTransformInfo::TCC_Basic; @@ -323,7 +325,7 @@ return TargetTransformInfo::TCC_Expensive; } - return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U); + return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U, CostKind); } unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, @@ -625,6 +627,7 @@ unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -661,7 +664,7 @@ if (auto *VTy = dyn_cast(Ty)) { unsigned Num = VTy->getNumElements(); unsigned Cost = static_cast(this)->getArithmeticInstrCost( - Opcode, VTy->getScalarType()); + Opcode, VTy->getScalarType(), CostKind); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. return getScalarizationOverhead(VTy, Args) + Num * Cost; @@ -691,6 +694,7 @@ } unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -802,14 +806,16 @@ unsigned SplitCost = (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0; return SplitCost + - (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, I)); + (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, + CostKind, I)); } // In other cases where the source or destination are illegal, assume // the operation will get scalarized. unsigned Num = DstVTy->getNumElements(); unsigned Cost = static_cast(this)->getCastInstrCost( - Opcode, Dst->getScalarType(), Src->getScalarType(), I); + Opcode, Dst->getScalarType(), Src->getScalarType(), + CostKind, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. @@ -835,16 +841,18 @@ return static_cast(this)->getVectorInstrCost( Instruction::ExtractElement, VecTy, Index) + static_cast(this)->getCastInstrCost(Opcode, Dst, - VecTy->getElementType()); + VecTy->getElementType(), + TTI::TCK_RecipThroughput); } - unsigned getCFInstrCost(unsigned Opcode) { + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) { // Branches are assumed to be predicted. return 0; } unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - const Instruction *I) { + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) { const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -872,7 +880,7 @@ if (CondTy) CondTy = CondTy->getScalarType(); unsigned Cost = static_cast(this)->getCmpSelInstrCost( - Opcode, ValVTy->getScalarType(), CondTy, I); + Opcode, ValVTy->getScalarType(), CondTy, CostKind, I); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. @@ -892,6 +900,7 @@ unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr) { assert(!Src->isVoidTy() && "Invalid type"); std::pair LT = getTLI()->getTypeLegalizationCost(DL, Src); @@ -926,6 +935,7 @@ unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false) { auto *VT = cast(VecTy); @@ -940,10 +950,10 @@ unsigned Cost; if (UseMaskForCond || UseMaskForGaps) Cost = static_cast(this)->getMaskedMemoryOpCost( - Opcode, VecTy, Alignment, AddressSpace); + Opcode, VecTy, Alignment, AddressSpace, CostKind); else Cost = static_cast(this)->getMemoryOpCost( - Opcode, VecTy, MaybeAlign(Alignment), AddressSpace); + Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind); // Legalize the vector type, and get the legalized and unlegalized type // sizes. @@ -1071,16 +1081,18 @@ // inside the loop. if (UseMaskForGaps) Cost += static_cast(this)->getArithmeticInstrCost( - BinaryOperator::And, MaskVT); + BinaryOperator::And, MaskVT, CostKind); return Cost; } /// Get intrinsic cost based on arguments. - unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, - const Instruction *I = nullptr) { + unsigned getIntrinsicInstrCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Args, + FastMathFlags FMF, unsigned VF = 1, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr) { + unsigned RetVF = (RetTy->isVectorTy() ? cast(RetTy)->getNumElements() : 1); assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"); @@ -1111,7 +1123,8 @@ } return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF, - ScalarizationCost); + ScalarizationCost, CostKind, + I); } case Intrinsic::masked_scatter: { assert(VF == 1 && "Can't vectorize types here."); @@ -1120,7 +1133,8 @@ unsigned Alignment = cast(Args[2])->getZExtValue(); return ConcreteTTI->getGatherScatterOpCost(Instruction::Store, Args[0]->getType(), Args[1], - VarMask, Alignment, I); + VarMask, Alignment, CostKind, + I); } case Intrinsic::masked_gather: { assert(VF == 1 && "Can't vectorize types here."); @@ -1128,7 +1142,7 @@ bool VarMask = !isa(Mask); unsigned Alignment = cast(Args[1])->getZExtValue(); return ConcreteTTI->getGatherScatterOpCost( - Instruction::Load, RetTy, Args[0], VarMask, Alignment, I); + Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I); } case Intrinsic::experimental_vector_reduce_add: case Intrinsic::experimental_vector_reduce_mul: @@ -1143,7 +1157,8 @@ case Intrinsic::experimental_vector_reduce_fmin: case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: - return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF); + return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF, 1, + CostKind, I); case Intrinsic::fshl: case Intrinsic::fshr: { Value *X = Args[0]; @@ -1159,25 +1174,30 @@ // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) unsigned Cost = 0; - Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy); - Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy); + Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy, + CostKind); + Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, + CostKind); Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy, + CostKind, OpKindX, OpKindZ, OpPropsX); Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy, + CostKind, OpKindY, OpKindZ, OpPropsY); // Non-constant shift amounts requires a modulo. if (OpKindZ != TTI::OK_UniformConstantValue && OpKindZ != TTI::OK_NonUniformConstantValue) Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy, + CostKind, OpKindZ, OpKindBW, OpPropsZ, OpPropsBW); // For non-rotates (X != Y) we must add shift-by-zero handling costs. if (X != Y) { Type *CondTy = RetTy->getWithNewBitWidth(1); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, - CondTy, nullptr); + CondTy, CostKind); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, nullptr); + CondTy, CostKind); } return Cost; } @@ -1191,6 +1211,7 @@ unsigned getIntrinsicInstrCost( Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = std::numeric_limits::max(), + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, const Instruction *I = nullptr) { auto *ConcreteTTI = static_cast(this); auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast(Tys[0]); @@ -1226,7 +1247,8 @@ return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. unsigned ScalarCost = - ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF); + ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF, + CostKind); return ScalarCalls * ScalarCost + ScalarizationCost; } @@ -1312,34 +1334,40 @@ return 0; case Intrinsic::masked_store: return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, - 0); + 0, CostKind); case Intrinsic::masked_load: - return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0); + return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0, + CostKind); case Intrinsic::experimental_vector_reduce_add: return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, VecOpTy, - /*IsPairwiseForm=*/false); + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_mul: return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, VecOpTy, - /*IsPairwiseForm=*/false); + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_and: return ConcreteTTI->getArithmeticReductionCost(Instruction::And, VecOpTy, - /*IsPairwiseForm=*/false); + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_or: return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, VecOpTy, - /*IsPairwiseForm=*/false); + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_xor: return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, VecOpTy, - /*IsPairwiseForm=*/false); + /*IsPairwiseForm=*/false, + CostKind); case Intrinsic::experimental_vector_reduce_v2_fadd: + // FIXME: Add new flag for cost of strict reductions. return ConcreteTTI->getArithmeticReductionCost( Instruction::FAdd, VecOpTy, - /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict - // reductions. + /*IsPairwiseForm=*/false, CostKind); case Intrinsic::experimental_vector_reduce_v2_fmul: + // FIXME: Add new flag for cost of strict reductions. return ConcreteTTI->getArithmeticReductionCost( Instruction::FMul, VecOpTy, - /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict - // reductions. + /*IsPairwiseForm=*/false, CostKind); case Intrinsic::experimental_vector_reduce_smax: case Intrinsic::experimental_vector_reduce_smin: case Intrinsic::experimental_vector_reduce_fmax: @@ -1347,13 +1375,13 @@ return ConcreteTTI->getMinMaxReductionCost( VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), /*IsPairwiseForm=*/false, - /*IsUnsigned=*/false); + /*IsUnsigned=*/false, CostKind); case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: return ConcreteTTI->getMinMaxReductionCost( VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), /*IsPairwiseForm=*/false, - /*IsUnsigned=*/true); + /*IsUnsigned=*/true, CostKind); case Intrinsic::sadd_sat: case Intrinsic::ssub_sat: { Type *CondTy = RetTy->getWithNewBitWidth(1); @@ -1367,11 +1395,12 @@ // SatMin -> Overflow && SumDiff >= 0 unsigned Cost = 0; Cost += ConcreteTTI->getIntrinsicInstrCost( - OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); + OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed, + CostKind); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, - CondTy, nullptr); + CondTy, CostKind); Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, nullptr); + CondTy, CostKind); return Cost; } case Intrinsic::uadd_sat: @@ -1385,9 +1414,10 @@ unsigned Cost = 0; Cost += ConcreteTTI->getIntrinsicInstrCost( - OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed); + OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed, + CostKind); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy, - CondTy, nullptr); + CondTy, CostKind); return Cost; } case Intrinsic::smul_fix: @@ -1399,17 +1429,22 @@ IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; unsigned Cost = 0; - Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy); - Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); + Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy, CostKind); + Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy, + CostKind); Cost += - 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy); + 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, + CostKind); Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy, + CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy, + CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); - Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy); + Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy, + CostKind); return Cost; } case Intrinsic::sadd_with_overflow: @@ -1429,13 +1464,14 @@ // Sub: // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) unsigned Cost = 0; - Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); + Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy, CostKind); Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, - OverflowTy, nullptr); + OverflowTy, CostKind); Cost += 2 * ConcreteTTI->getCmpSelInstrCost( - BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr); + BinaryOperator::ICmp, OverflowTy, OverflowTy, CostKind); Cost += - ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy); + ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy, + CostKind); return Cost; } case Intrinsic::uadd_with_overflow: @@ -1447,9 +1483,9 @@ : BinaryOperator::Sub; unsigned Cost = 0; - Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy); + Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy, CostKind); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, - OverflowTy, nullptr); + OverflowTy, CostKind); return Cost; } case Intrinsic::smul_with_overflow: @@ -1463,21 +1499,24 @@ IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; unsigned Cost = 0; - Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy); - Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy); + Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy, CostKind); + Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy, + CostKind); Cost += - 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy); + 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, + CostKind); Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy, + CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); if (IID == Intrinsic::smul_with_overflow) Cost += ConcreteTTI->getArithmeticInstrCost( - Instruction::AShr, MulTy, TTI::OK_AnyValue, + Instruction::AShr, MulTy, CostKind, TTI::OK_AnyValue, TTI::OK_UniformConstantValue); Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, - OverflowTy, nullptr); + OverflowTy, CostKind); return Cost; } case Intrinsic::ctpop: @@ -1534,14 +1573,17 @@ // If we can't lower fmuladd into an FMA estimate the cost as a floating // point mul followed by an add. if (IID == Intrinsic::fmuladd) - return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + - ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); + return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy, + CostKind) + + ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy, + CostKind); if (IID == Intrinsic::experimental_constrained_fmuladd) return ConcreteTTI->getIntrinsicCost( - Intrinsic::experimental_constrained_fmul, RetTy, Tys, - nullptr) + + Intrinsic::experimental_constrained_fmul, RetTy, Tys, nullptr, + CostKind) + ConcreteTTI->getIntrinsicCost( - Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr); + Intrinsic::experimental_constrained_fadd, RetTy, Tys, nullptr, + CostKind); // Else, assume that we need to scalarize this intrinsic. For math builtins // this will emit a costly libcall, adding call overhead and spills. Make it @@ -1560,7 +1602,7 @@ ScalarTys.push_back(Ty); } unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost( - IID, RetTy->getScalarType(), ScalarTys, FMF); + IID, RetTy->getScalarType(), ScalarTys, FMF, CostKind); for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { if (Tys[i]->isVectorTy()) { if (ScalarizationCostPassed == std::numeric_limits::max()) @@ -1588,7 +1630,8 @@ /// \param RetTy Return value types. /// \param Tys Argument types. /// \returns The cost of Call instruction. - unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys) { + unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { return 10; } @@ -1638,7 +1681,8 @@ /// The cost model should take into account that the actual length of the /// vector is reduced on each iteration. unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwise) { + bool IsPairwise, + TTI::TargetCostKind CostKind) { Type *ScalarTy = Ty->getElementType(); unsigned NumVecElts = Ty->getNumElements(); unsigned NumReduxLevels = Log2_32(NumVecElts); @@ -1657,7 +1701,7 @@ ShuffleCost += (IsPairwise + 1) * ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy); - ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy); + ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy, CostKind); Ty = SubTy; ++LongVectorCount; } @@ -1687,7 +1731,8 @@ /// Try to calculate op costs for min/max reduction operations. /// \param CondTy Conditional type for the Select instruction. unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwise, bool) { + bool IsPairwise, bool IsUnsigned, + TTI::TargetCostKind CostKind) { Type *ScalarTy = Ty->getElementType(); Type *ScalarCondTy = CondTy->getElementType(); unsigned NumVecElts = Ty->getNumElements(); @@ -1718,9 +1763,9 @@ ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy); MinMaxCost += - ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) + + ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) + ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, - nullptr); + CostKind); Ty = SubTy; ++LongVectorCount; } @@ -1743,9 +1788,9 @@ 0, Ty); MinMaxCost += NumReduxLevels * - (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + + (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) + ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, - nullptr)); + CostKind)); // The last min/max should be in vector registers and we counted it above. // So just need a single extractelement. return ShuffleCost + MinMaxCost + diff --git a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp --- a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -2157,6 +2157,9 @@ return false; // Assume to be zero-cost. } + TargetTransformInfo::TargetCostKind CostKind = + TargetTransformInfo::TCK_RecipThroughput; + if (auto *CastExpr = dyn_cast(S)) { unsigned Opcode; switch (S->getSCEVType()) { @@ -2174,7 +2177,7 @@ } const SCEV *Op = CastExpr->getOperand(); BudgetRemaining -= TTI.getCastInstrCost(Opcode, /*Dst=*/S->getType(), - /*Src=*/Op->getType()); + /*Src=*/Op->getType(), CostKind); Worklist.emplace_back(Op); return false; // Will answer upon next entry into this function. } @@ -2184,7 +2187,8 @@ if (auto *SC = dyn_cast(UDivExpr->getRHS())) { if (SC->getAPInt().isPowerOf2()) { BudgetRemaining -= - TTI.getArithmeticInstrCost(Instruction::LShr, S->getType()); + TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(), + CostKind); // Note that we don't count the cost of RHS, because it is a constant, // and we consider those to be free. But if that changes, we would need // to log2() it first before calling isHighCostExpansionHelper(). @@ -2207,7 +2211,8 @@ // Need to count the cost of this UDiv. BudgetRemaining -= - TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType()); + TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(), + CostKind); Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()}); return false; // Will answer upon next entry into this function. } @@ -2218,8 +2223,10 @@ assert(NAry->getNumOperands() >= 2 && "Polynomial should be at least linear"); - int AddCost = TTI.getArithmeticInstrCost(Instruction::Add, OpType); - int MulCost = TTI.getArithmeticInstrCost(Instruction::Mul, OpType); + int AddCost = + TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); + int MulCost = + TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); // In this polynominal, we may have some zero operands, and we shouldn't // really charge for those. So how many non-zero coeffients are there? @@ -2273,22 +2280,26 @@ int PairCost; switch (S->getSCEVType()) { case scAddExpr: - PairCost = TTI.getArithmeticInstrCost(Instruction::Add, OpType); + PairCost = + TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); break; case scMulExpr: // TODO: this is a very pessimistic cost modelling for Mul, // because of Bin Pow algorithm actually used by the expander, // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). - PairCost = TTI.getArithmeticInstrCost(Instruction::Mul, OpType); + PairCost = + TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); break; case scSMaxExpr: case scUMaxExpr: case scSMinExpr: case scUMinExpr: PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType, - CmpInst::makeCmpResultType(OpType)) + + CmpInst::makeCmpResultType(OpType), + CostKind) + TTI.getCmpSelInstrCost(Instruction::Select, OpType, - CmpInst::makeCmpResultType(OpType)); + CmpInst::makeCmpResultType(OpType), + CostKind); break; default: llvm_unreachable("There are no other variants here."); diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -154,8 +154,9 @@ } int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr, - ArrayRef Operands) const { - return TTIImpl->getGEPCost(PointeeType, Ptr, Operands); + ArrayRef Operands, + TTI::TargetCostKind CostKind) const { + return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind); } int TargetTransformInfo::getExtCost(const Instruction *I, @@ -165,8 +166,9 @@ int TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments, - const User *U) const { - int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U); + const User *U, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -440,22 +442,27 @@ return Cost; } -int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { - int Cost = TTIImpl->getIntImmCost(Imm, Ty); +int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntImmCost(Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) const { - int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty); +int +TargetTransformInfo::getIntImmCostInst(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) const { - int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty); +int +TargetTransformInfo::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -582,12 +589,14 @@ } int TargetTransformInfo::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + OperandValueKind Opd1Info, OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) const { int Cost = TTIImpl->getArithmeticInstrCost( - Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); + Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, + Args, CxtI); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -600,10 +609,11 @@ } int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I); + int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -616,18 +626,20 @@ return Cost; } -int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { - int Cost = TTIImpl->getCFInstrCost(Opcode); +int TargetTransformInfo::getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getCFInstrCost(Opcode, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -642,40 +654,45 @@ int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) const { assert((I == nullptr || I->getOpcode() == Opcode) && "Opcode should reflect passed instruction."); - int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace) const { + unsigned AddressSpace, + TTI::TargetCostKind CostKind) const { int Cost = - TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, - Value *Ptr, bool VariableMask, - unsigned Alignment, - const Instruction *I) const { +int TargetTransformInfo::getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I) const { int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, - Alignment, I); + Alignment, CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond, - bool UseMaskForGaps) const { + unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, + bool UseMaskForCond, bool UseMaskForGaps) const { int Cost = TTIImpl->getInterleavedMemoryOpCost( - Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, UseMaskForCond, - UseMaskForGaps); + Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, CostKind, + UseMaskForCond, UseMaskForGaps); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -684,9 +701,11 @@ ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) const { int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, + I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -694,15 +713,18 @@ int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, unsigned VF, + TTI::TargetCostKind CostKind, const Instruction *I) const { - int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, + CostKind, I); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, - ArrayRef Tys) const { - int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys); + ArrayRef Tys, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -727,18 +749,20 @@ int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm) const { - int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm); + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) const { + int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, + CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } -int TargetTransformInfo::getMinMaxReductionCost(VectorType *Ty, - VectorType *CondTy, - bool IsPairwiseForm, - bool IsUnsigned) const { +int TargetTransformInfo::getMinMaxReductionCost( + VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind) const { int Cost = - TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned); + TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned, + CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } @@ -1151,14 +1175,16 @@ } int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + switch (I->getOpcode()) { case Instruction::GetElementPtr: - return getUserCost(I, TCK_RecipThroughput); + return getUserCost(I, CostKind); case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { - return getCFInstrCost(I->getOpcode()); + return getCFInstrCost(I->getOpcode(), CostKind); } case Instruction::Add: case Instruction::FAdd: @@ -1183,7 +1209,8 @@ Op1VK = getOperandInfo(I->getOperand(0), Op1VP); Op2VK = getOperandInfo(I->getOperand(1), Op2VP); SmallVector Operands(I->operand_values()); - return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, + return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind, + Op1VK, Op2VK, Op1VP, Op2VP, Operands, I); } case Instruction::FNeg: { @@ -1193,31 +1220,34 @@ Op2VK = OK_AnyValue; Op2VP = OP_None; SmallVector Operands(I->operand_values()); - return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK, + return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind, + Op1VK, Op2VK, Op1VP, Op2VP, Operands, I); } case Instruction::Select: { const SelectInst *SI = cast(I); Type *CondTy = SI->getCondition()->getType(); - return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); + return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, + CostKind, I); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); - return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); + return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), + CostKind, I); } case Instruction::Store: { const StoreInst *SI = cast(I); Type *ValTy = SI->getValueOperand()->getType(); return getMemoryOpCost(I->getOpcode(), ValTy, MaybeAlign(SI->getAlignment()), - SI->getPointerAddressSpace(), I); + SI->getPointerAddressSpace(), CostKind, I); } case Instruction::Load: { const LoadInst *LI = cast(I); return getMemoryOpCost(I->getOpcode(), I->getType(), MaybeAlign(LI->getAlignment()), - LI->getPointerAddressSpace(), I); + LI->getPointerAddressSpace(), CostKind, I); } case Instruction::ZExt: case Instruction::SExt: @@ -1233,7 +1263,7 @@ case Instruction::BitCast: case Instruction::AddrSpaceCast: { Type *SrcTy = I->getOperand(0)->getType(); - return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); + return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, CostKind, I); } case Instruction::ExtractElement: { const ExtractElementInst *EEI = cast(I); @@ -1250,7 +1280,8 @@ switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { case RK_Arithmetic: return getArithmeticReductionCost(ReduxOpCode, ReduxType, - /*IsPairwiseForm=*/false); + /*IsPairwiseForm=*/false, + CostKind); case RK_MinMax: return getMinMaxReductionCost( ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), @@ -1266,7 +1297,7 @@ switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { case RK_Arithmetic: return getArithmeticReductionCost(ReduxOpCode, ReduxType, - /*IsPairwiseForm=*/true); + /*IsPairwiseForm=*/true, CostKind); case RK_MinMax: return getMinMaxReductionCost( ReduxType, cast(CmpInst::makeCmpResultType(ReduxType)), @@ -1334,7 +1365,7 @@ FMF = FPMO->getFastMathFlags(); return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, - FMF, 1, II); + FMF, 1, CostKind, II); } return -1; default: diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -6652,6 +6652,8 @@ uint64_t ScalarCost = TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); uint64_t VectorCost = StoreExtractCombineCost; + enum TargetTransformInfo::TargetCostKind CostKind = + TargetTransformInfo::TCK_RecipThroughput; for (const auto &Inst : InstsToBePromoted) { // Compute the cost. // By construction, all instructions being promoted are arithmetic ones. @@ -6667,8 +6669,9 @@ !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue : TargetTransformInfo::OK_AnyValue; ScalarCost += TTI.getArithmeticInstrCost( - Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK); + Inst->getOpcode(), Inst->getType(), CostKind, Arg0OVK, Arg1OVK); VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, + CostKind, Arg0OVK, Arg1OVK); } LLVM_DEBUG( @@ -7127,7 +7130,8 @@ return false; ConstantInt *GEPIIdx = cast(GEPI->getOperand(1)); // Check that GEPI is a cheap one. - if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType()) + if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(), + TargetTransformInfo::TCK_SizeAndLatency) > TargetTransformInfo::TCC_Basic) return false; Value *GEPIOp = GEPI->getOperand(0); @@ -7176,7 +7180,8 @@ cast(UGEPI->getOperand(1))->getType()) return false; ConstantInt *UGEPIIdx = cast(UGEPI->getOperand(1)); - if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType()) + if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(), + TargetTransformInfo::TCK_SizeAndLatency) > TargetTransformInfo::TCC_Basic) return false; UGEPIs.push_back(UGEPI); @@ -7187,7 +7192,9 @@ for (GetElementPtrInst *UGEPI : UGEPIs) { ConstantInt *UGEPIIdx = cast(UGEPI->getOperand(1)); APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue(); - unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType()); + unsigned ImmCost = + TTI->getIntImmCost(NewIdx, GEPIIdx->getType(), + TargetTransformInfo::TCK_SizeAndLatency); if (ImmCost > TargetTransformInfo::TCC_Basic) return false; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -72,11 +72,11 @@ using BaseT::getIntImmCost; int getIntImmCost(int64_t Val); - int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); /// @} @@ -112,6 +112,7 @@ unsigned getMaxInterleaveFactor(unsigned VF); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, @@ -121,6 +122,7 @@ int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -131,13 +133,16 @@ int getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const; int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); int getCostOfKeepingLiveOverCall(ArrayRef Tys); @@ -192,6 +197,7 @@ int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); @@ -225,7 +231,8 @@ TTI::ReductionFlags Flags) const; int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm); + bool IsPairwiseForm, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, VectorType *SubTp); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -57,7 +57,8 @@ } /// Calculate the cost of materializing the given constant. -int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -82,7 +83,8 @@ } int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -139,16 +141,17 @@ if (Idx == ImmIdx) { int NumConstants = (BitSize + 63) / 64; - int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); + int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); return (Cost <= NumConstants * TTI::TCC_Basic) ? static_cast(TTI::TCC_Free) : Cost; } - return AArch64TTIImpl::getIntImmCost(Imm, Ty); + return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); } int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -161,7 +164,7 @@ // selected instruction, so we compute the materialization cost for the // immediate directly. if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv) - return AArch64TTIImpl::getIntImmCost(Imm, Ty); + return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); switch (IID) { default: @@ -174,7 +177,7 @@ case Intrinsic::umul_with_overflow: if (Idx == 1) { int NumConstants = (BitSize + 63) / 64; - int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty); + int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); return (Cost <= NumConstants * TTI::TCC_Basic) ? static_cast(TTI::TCC_Free) : Cost; @@ -190,7 +193,7 @@ return TTI::TCC_Free; break; } - return AArch64TTIImpl::getIntImmCost(Imm, Ty); + return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); } TargetTransformInfo::PopcntSupportKind @@ -267,6 +270,7 @@ } int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -295,7 +299,7 @@ EVT DstTy = TLI->getValueType(DL, Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) - return BaseT::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); static const TypeConversionCostTblEntry ConversionTbl[] = { @@ -399,7 +403,7 @@ SrcTy.getSimpleVT())) return Entry->Cost; - return BaseT::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); } int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst, @@ -425,17 +429,18 @@ auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy); auto DstVT = TLI->getValueType(DL, Dst); auto SrcVT = TLI->getValueType(DL, Src); + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; // If the resulting type is still a vector and the destination type is legal, // we may get the extension for free. If not, get the default cost for the // extend. if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT)) - return Cost + getCastInstrCost(Opcode, Dst, Src); + return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind); // The destination type should be larger than the element type. If not, get // the default cost for the extend. if (DstVT.getSizeInBits() < SrcVT.getSizeInBits()) - return Cost + getCastInstrCost(Opcode, Dst, Src); + return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind); switch (Opcode) { default: @@ -454,7 +459,7 @@ } // If we are unable to perform the extend for free, get the default cost. - return Cost + getCastInstrCost(Opcode, Dst, Src); + return Cost + getCastInstrCost(Opcode, Dst, Src, CostKind); } int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, @@ -483,7 +488,8 @@ } int AArch64TTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { @@ -504,7 +510,8 @@ switch (ISD) { default: - return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); case ISD::SDIV: if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue && @@ -513,16 +520,20 @@ // normally expanded to the sequence ADD + CMP + SELECT + SRA. // The OperandValue properties many not be same as that of previous // operation; conservatively assume OP_None. - Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info, + Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info, + Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info, + Cost += getArithmeticInstrCost(Instruction::Select, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info, + Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); return Cost; @@ -535,31 +546,34 @@ // Vector signed division by constant are expanded to the // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division // to MULHS + SUB + SRL + ADD + SRL. - int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info, - Opd2Info, + int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, - Opd2Info, + int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, - Opd2Info, + int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, + Opd1Info, Opd2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1; } } - Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); if (Ty->isVectorTy()) { // On AArch64, vector divisions are not supported natively and are // expanded into scalar divisions of each pair of elements. - Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info, - Opd2Info, Opd1PropInfo, Opd2PropInfo); - Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info, - Opd2Info, Opd1PropInfo, Opd2PropInfo); + Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, CostKind, + Opd1Info, Opd2Info, Opd1PropInfo, + Opd2PropInfo); + Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind, + Opd1Info, Opd2Info, Opd1PropInfo, + Opd2PropInfo); // TODO: if one of the arguments is scalar, then it's not necessary to // double the cost of handling the vector elements. Cost += Cost; @@ -596,7 +610,9 @@ } int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, const Instruction *I) { + Type *CondTy, + TTI::TargetCostKind CostKind, + const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // We don't lower some vector selects well that are wider than the register @@ -623,7 +639,7 @@ return Entry->Cost; } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); } AArch64TTIImpl::TTI::MemCmpExpansionOptions @@ -646,6 +662,7 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { auto LT = TLI->getTypeLegalizationCost(DL, Ty); @@ -688,6 +705,7 @@ ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { assert(Factor >= 2 && "Invalid interleave factor"); @@ -707,18 +725,19 @@ } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef Tys) { int Cost = 0; + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; for (auto *I : Tys) { if (!I->isVectorTy()) continue; if (I->getScalarSizeInBits() * cast(I)->getNumElements() == 128) - Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) + - getMemoryOpCost(Instruction::Load, I, Align(128), 0); + Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) + + getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind); } return Cost; } @@ -932,10 +951,12 @@ int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, - bool IsPairwiseForm) { + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) { if (IsPairwiseForm) - return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm); + return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, + CostKind); std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); MVT MTy = LT.second; @@ -956,7 +977,8 @@ if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy)) return LT.first * Entry->Cost; - return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm); + return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, + CostKind); } int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -184,6 +184,7 @@ int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -191,7 +192,7 @@ ArrayRef Args = ArrayRef(), const Instruction *CxtI = nullptr); - unsigned getCFInstrCost(unsigned Opcode); + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); bool isInlineAsmSourceOfDivergence(const CallInst *CI, ArrayRef Indices = {}) const; @@ -225,23 +226,30 @@ int getInlinerVectorBonusPercent() { return 0; } - int getArithmeticReductionCost(unsigned Opcode, - VectorType *Ty, - bool IsPairwise); + int getArithmeticReductionCost( + unsigned Opcode, + VectorType *Ty, + bool IsPairwise, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); + template int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, unsigned VF, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - const Instruction *I = nullptr); - int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, const Instruction *I = nullptr); - int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwiseForm, - bool IsUnsigned); + int getIntrinsicInstrCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = UINT_MAX, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr); + int getIntrinsicInstrCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, + unsigned VF = 1, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr); + int getMinMaxReductionCost( + VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); + unsigned getUserCost(const User *U, ArrayRef Operands, TTI::TargetCostKind CostKind); }; @@ -282,7 +290,7 @@ unsigned Alignment, unsigned AddrSpace) const; unsigned getMaxInterleaveFactor(unsigned VF); - unsigned getCFInstrCost(unsigned Opcode); + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind); int getVectorInstrCost(unsigned Opcode, Type *ValTy, unsigned Index); }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -422,6 +422,7 @@ } int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -430,7 +431,8 @@ const Instruction *CxtI) { EVT OrigTy = TLI->getValueType(DL, Ty); if (!OrigTy.isSimple()) { - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); } @@ -542,7 +544,8 @@ break; } - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); } @@ -562,13 +565,15 @@ template int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I) { + unsigned VF, + TTI::TargetCostKind CostKind, + const Instruction *I) { if (!intrinsicHasPackedVectorBenefit(ID)) - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); EVT OrigTy = TLI->getValueType(DL, RetTy); if (!OrigTy.isSimple()) { - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); } // Legalize the type. @@ -597,31 +602,36 @@ int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I) { - return getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + unsigned VF, + TTI::TargetCostKind CostKind, + const Instruction *I) { + return getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); } int GCNTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) { return getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, I); } -unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode) { +unsigned GCNTTIImpl::getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind) { // XXX - For some reason this isn't called for switch. switch (Opcode) { case Instruction::Br: case Instruction::Ret: return 10; default: - return BaseT::getCFInstrCost(Opcode); + return BaseT::getCFInstrCost(Opcode, CostKind); } } int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwise) { + bool IsPairwise, + TTI::TargetCostKind CostKind) { EVT OrigTy = TLI->getValueType(DL, Ty); // Computes cost on targets that have packed math instructions(which support @@ -629,15 +639,15 @@ if (IsPairwise || !ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16) - return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise); + return BaseT::getArithmeticReductionCost(Opcode, Ty, IsPairwise, CostKind); std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); return LT.first * getFullRateInstrCost(); } int GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwise, - bool IsUnsigned) { + bool IsPairwise, bool IsUnsigned, + TTI::TargetCostKind CostKind) { EVT OrigTy = TLI->getValueType(DL, Ty); // Computes cost on targets that have packed math instructions(which support @@ -645,7 +655,8 @@ if (IsPairwise || !ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16) - return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned); + return BaseT::getMinMaxReductionCost(Ty, CondTy, IsPairwise, IsUnsigned, + CostKind); std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); return LT.first * getHalfRateInstrCost(); @@ -979,7 +990,7 @@ if (auto *FPMO = dyn_cast(II)) FMF = FPMO->getFastMathFlags(); return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), Args, - FMF, 1, II); + FMF, 1, CostKind, II); } else { return BaseT::getUserCost(U, Operands, CostKind); } @@ -1031,7 +1042,7 @@ case Instruction::BitCast: case Instruction::AddrSpaceCast: { return getCastInstrCost(I->getOpcode(), I->getType(), - I->getOperand(0)->getType(), I); + I->getOperand(0)->getType(), CostKind, I); } case Instruction::Add: case Instruction::FAdd: @@ -1052,7 +1063,7 @@ case Instruction::Or: case Instruction::Xor: case Instruction::FNeg: { - return getArithmeticInstrCost(I->getOpcode(), I->getType(), + return getArithmeticInstrCost(I->getOpcode(), I->getType(), CostKind, TTI::OK_AnyValue, TTI::OK_AnyValue, TTI::OP_None, TTI::OP_None, Operands, I); } @@ -1127,14 +1138,15 @@ return 8; } -unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode) { +unsigned R600TTIImpl::getCFInstrCost(unsigned Opcode, + TTI::TargetCostKind CostKind) { // XXX - For some reason this isn't called for switch. switch (Opcode) { case Instruction::Br: case Instruction::Ret: return 10; default: - return BaseT::getCFInstrCost(Opcode); + return BaseT::getCFInstrCost(Opcode, CostKind); } } diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -110,9 +110,10 @@ Type *Ty); using BaseT::getIntImmCost; - int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty, TTI::TargetCostKind CostKind); /// @} @@ -194,9 +195,11 @@ } int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); @@ -206,6 +209,7 @@ int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Op1Info = TTI::OK_AnyValue, TTI::OperandValueKind Op2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -214,17 +218,21 @@ const Instruction *CxtI = nullptr); int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); - unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, - bool VariableMask, unsigned Alignment, - const Instruction *I = nullptr); + unsigned getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); bool isLoweredToCall(const Function *F); bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -80,7 +80,8 @@ return false; } -int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned Bits = Ty->getPrimitiveSizeInBits(); @@ -123,7 +124,7 @@ } int ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) { + Type *Ty, TTI::TargetCostKind CostKind) { // Division by a constant can be turned into multiplication, but only if we // know it's constant. So it's not so much that the immediate is cheap (it's // not), but that the alternative is worse. @@ -138,12 +139,14 @@ if (Imm == 255 || Imm == 65535) return 0; // Conversion to BIC is free, and means we can use ~Imm instead. - return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty)); + return std::min(getIntImmCost(Imm, Ty, CostKind), + getIntImmCost(~Imm, Ty, CostKind)); } if (Opcode == Instruction::Add) // Conversion to SUB is free, and means we can use -Imm instead. - return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty)); + return std::min(getIntImmCost(Imm, Ty, CostKind), + getIntImmCost(-Imm, Ty, CostKind)); if (Opcode == Instruction::ICmp && Imm.isNegative() && Ty->getIntegerBitWidth() == 32) { @@ -160,10 +163,11 @@ if (Opcode == Instruction::Xor && Imm.isAllOnesValue()) return 0; - return getIntImmCost(Imm, Ty); + return getIntImmCost(Imm, Ty, CostKind); } int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -187,7 +191,7 @@ EVT DstTy = TLI->getValueType(DL, Dst); if (!SrcTy.isSimple() || !DstTy.isSimple()) - return BaseT::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); // The extend of a load is free if (I && isa(I->getOperand(0))) { @@ -418,7 +422,7 @@ int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy() ? ST->getMVEVectorCostFactor() : 1; - return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src); + return BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); } int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, @@ -458,6 +462,7 @@ } int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a vector select gets lowered to vbsl. @@ -485,7 +490,8 @@ int BaseCost = ST->hasMVEIntegerOps() && ValTy->isVectorTy() ? ST->getMVEVectorCostFactor() : 1; - return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + return BaseCost * BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, + I); } int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, @@ -681,6 +687,7 @@ } int ARMTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -737,7 +744,8 @@ if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second)) return LT.first * Entry->Cost; - int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, + Op2Info, Opd1PropInfo, Opd2PropInfo); // This is somewhat of a hack. The problem that we are facing is that SROA @@ -795,7 +803,8 @@ // Else this is expand, assume that we need to scalarize this op. if (auto *VTy = dyn_cast(Ty)) { unsigned Num = VTy->getNumElements(); - unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType()); + unsigned Cost = getArithmeticInstrCost(Opcode, Ty->getScalarType(), + CostKind); // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. return BaseT::getScalarizationOverhead(Ty, Args) + Num * Cost; @@ -806,6 +815,7 @@ int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { std::pair LT = TLI->getTypeLegalizationCost(DL, Src); @@ -824,8 +834,9 @@ int ARMTTIImpl::getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond, - bool UseMaskForGaps) { + unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, + bool UseMaskForCond, bool UseMaskForGaps) { assert(Factor >= 2 && "Invalid interleave factor"); assert(isa(VecTy) && "Expect a vector type"); @@ -856,18 +867,19 @@ } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } unsigned ARMTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment, + TTI::TargetCostKind CostKind, const Instruction *I) { using namespace PatternMatch; if (!ST->hasMVEIntegerOps() || !EnableMaskedGatherScatters) return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, - Alignment, I); + Alignment, CostKind, I); assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!"); VectorType *VTy = cast(DataTy); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -105,34 +105,44 @@ bool Insert, bool Extract); unsigned getOperandsScalarizationOverhead(ArrayRef Args, unsigned VF); - unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys); + unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys, + TTI::TargetCostKind CostKind); unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I); - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, - ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - const Instruction *I = nullptr); + unsigned VF, + TTI::TargetCostKind CostKind, + const Instruction *I); + unsigned getIntrinsicInstrCost( + Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = UINT_MAX, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr); unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *SE, const SCEV *S); unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency); unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment, + TTI::TargetCostKind CostKind, const Instruction *I); unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, - unsigned AddressSpace, bool UseMaskForCond = false, - bool UseMaskForGaps = false); + unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, + bool UseMaskForCond = false, bool UseMaskForGaps = false); unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, - const Instruction *I); + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -140,10 +150,11 @@ ArrayRef Args = ArrayRef(), const Instruction *CxtI = nullptr); unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); - unsigned getCFInstrCost(unsigned Opcode) { + unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) { return 1; } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp --- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -127,28 +127,30 @@ } unsigned HexagonTTIImpl::getCallInstrCost(Function *F, Type *RetTy, - ArrayRef Tys) { - return BaseT::getCallInstrCost(F, RetTy, Tys); + ArrayRef Tys, TTI::TargetCostKind CostKind) { + return BaseT::getCallInstrCost(F, RetTy, Tys, CostKind); } unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, unsigned VF, + TTI::TargetCostKind CostKind, const Instruction *I) { - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); } unsigned HexagonTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) { if (ID == Intrinsic::bswap) { std::pair LT = TLI.getTypeLegalizationCost(DL, RetTy); return LT.first + 2; } return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, I); } unsigned HexagonTTIImpl::getAddressComputationCost(Type *Tp, @@ -159,10 +161,12 @@ unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { assert(Opcode == Instruction::Load || Opcode == Instruction::Store); if (Opcode == Instruction::Store) - return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind, I); if (Src->isVectorTy()) { VectorType *VecTy = cast(Src); @@ -200,12 +204,15 @@ return (3 - LogA) * Cost * NumLoads; } - return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); + return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind, I); } unsigned HexagonTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, - Type *Src, unsigned Alignment, unsigned AddressSpace) { - return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + Type *Src, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind) { + return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); } unsigned HexagonTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, @@ -213,38 +220,41 @@ return 1; } -unsigned HexagonTTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, - Value *Ptr, bool VariableMask, - unsigned Alignment, - const Instruction *I) { +unsigned HexagonTTIImpl::getGatherScatterOpCost( + unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I) { return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, - Alignment, I); + Alignment, CostKind, I); } unsigned HexagonTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, - unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond, + unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { if (Indices.size() != Factor || UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, Alignment, AddressSpace, + CostKind, UseMaskForCond, UseMaskForGaps); return getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, - nullptr); + CostKind); } unsigned HexagonTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, const Instruction *I) { + Type *CondTy, TTI::TargetCostKind CostKind, const Instruction *I) { if (ValTy->isVectorTy()) { std::pair LT = TLI.getTypeLegalizationCost(DL, ValTy); if (Opcode == Instruction::FCmp) return LT.first + FloatFactor * getTypeNumElements(ValTy); } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); } unsigned HexagonTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { @@ -253,12 +263,12 @@ if (LT.second.isFloatingPoint()) return LT.first + FloatFactor * getTypeNumElements(Ty); } - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); } unsigned HexagonTTIImpl::getCastInstrCost(unsigned Opcode, Type *DstTy, - Type *SrcTy, const Instruction *I) { + Type *SrcTy, TTI::TargetCostKind CostKind, const Instruction *I) { if (SrcTy->isFPOrFPVectorTy() || DstTy->isFPOrFPVectorTy()) { unsigned SrcN = SrcTy->isFPOrFPVectorTy() ? getTypeNumElements(SrcTy) : 0; unsigned DstN = DstTy->isFPOrFPVectorTy() ? getTypeNumElements(DstTy) : 0; diff --git a/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h b/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h --- a/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h +++ b/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h @@ -49,7 +49,7 @@ return TTI::PSK_Software; } - int getIntImmCost(const APInt &Imm, Type *Ty) { + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); if (Imm == 0) return TTI::TCC_Free; @@ -66,17 +66,19 @@ return 4 * TTI::TCC_Basic; } - int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) { - return getIntImmCost(Imm, Ty); + int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { + return getIntImmCost(Imm, Ty, CostKind); } int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty) { - return getIntImmCost(Imm, Ty); + Type *Ty, TTI::TargetCostKind CostKind) { + return getIntImmCost(Imm, Ty, CostKind); } unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -87,7 +89,8 @@ switch (ISD) { default: - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); case ISD::MUL: case ISD::SDIV: @@ -98,7 +101,8 @@ // instruction cost was arbitrarily chosen to reduce the desirability // of emitting arithmetic instructions that are emulated in software. // TODO: Investigate the performance impact given specialized lowerings. - return 64 * BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return 64 * BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); } } diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h @@ -87,6 +87,7 @@ int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp --- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp @@ -112,7 +112,8 @@ } int NVPTXTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { @@ -123,7 +124,8 @@ switch (ISD) { default: - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); case ISD::ADD: case ISD::MUL: @@ -136,7 +138,8 @@ if (LT.second.SimpleTy == MVT::i64) return 2 * LT.first; // Delegate other cases to the basic TTI. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, + Opd2Info, Opd1PropInfo, Opd2PropInfo); } } diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h @@ -45,12 +45,13 @@ /// @{ using BaseT::getIntImmCost; - int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind); int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); unsigned getUserCost(const User *U, ArrayRef Operands, TTI::TargetCostKind CostKind); @@ -91,6 +92,7 @@ int vectorCostAdjustment(int Cost, unsigned Opcode, Type *Ty1, Type *Ty2); int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -99,25 +101,34 @@ const Instruction *CxtI = nullptr); int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); - unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + unsigned getIntrinsicInstrCost( + Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I = nullptr); + unsigned VF, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr); unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, const Instruction *I = nullptr); /// @} diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp @@ -59,9 +59,10 @@ return TTI::PSK_Software; } -int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int PPCTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { if (DisablePPCConstHoist) - return BaseT::getIntImmCost(Imm, Ty); + return BaseT::getIntImmCost(Imm, Ty, CostKind); assert(Ty->isIntegerTy()); @@ -89,9 +90,10 @@ } int PPCTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { if (DisablePPCConstHoist) - return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty); + return BaseT::getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); assert(Ty->isIntegerTy()); @@ -119,13 +121,14 @@ return TTI::TCC_Free; break; } - return PPCTTIImpl::getIntImmCost(Imm, Ty); + return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind); } int PPCTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { if (DisablePPCConstHoist) - return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty); + return BaseT::getIntImmCostInst(Opcode, Idx, Imm, Ty, CostKind); assert(Ty->isIntegerTy()); @@ -203,7 +206,7 @@ return TTI::TCC_Free; } - return PPCTTIImpl::getIntImmCost(Imm, Ty); + return PPCTTIImpl::getIntImmCost(Imm, Ty, CostKind); } unsigned @@ -720,6 +723,7 @@ } int PPCTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -729,7 +733,8 @@ assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); // Fallback to the default implementation. - int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, + Op2Info, Opd1PropInfo, Opd2PropInfo); return vectorCostAdjustment(Cost, Opcode, Ty, nullptr); } @@ -749,16 +754,18 @@ } int PPCTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); - int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src); + int Cost = BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); return vectorCostAdjustment(Cost, Opcode, Dst, Src); } int PPCTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I) { - int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + int Cost = BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); return vectorCostAdjustment(Cost, Opcode, ValTy, nullptr); } @@ -837,13 +844,15 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, Src); assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && "Invalid Opcode"); - int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, + CostKind); Cost = vectorCostAdjustment(Cost, Opcode, Src, nullptr); bool IsAltivecType = ST->hasAltivec() && @@ -913,11 +922,12 @@ ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); assert(isa(VecTy) && @@ -928,7 +938,8 @@ // Firstly, the cost of load/store operation. int Cost = - getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace); + getMemoryOpCost(Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, + CostKind); // PPC, for both Altivec/VSX and QPX, support cheap arbitrary permutations // (at least in the sense that there need only be one non-loop-invariant @@ -943,19 +954,21 @@ unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, unsigned VF, + TTI::TargetCostKind CostKind, const Instruction *I) { - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); } unsigned PPCTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) { if (ID == Intrinsic::bswap && ST->hasP9Vector()) return TLI->getTypeLegalizationCost(DL, RetTy).first; return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, I); } bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -41,12 +41,13 @@ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), TLI(ST->getTargetLowering()) {} - int getIntImmCost(const APInt &Imm, Type *Ty); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); }; } // end namespace llvm -#endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H \ No newline at end of file +#endif // LLVM_LIB_TARGET_RISCV_RISCVTARGETTRANSFORMINFO_H diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -15,7 +15,8 @@ #define DEBUG_TYPE "riscvtti" -int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy() && "getIntImmCost can only estimate cost of materialising integers"); @@ -30,7 +31,7 @@ } int RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) { + Type *Ty, TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy() && "getIntImmCost can only estimate cost of materialising integers"); @@ -78,7 +79,7 @@ } // Otherwise, use the full materialisation cost. - return getIntImmCost(Imm, Ty); + return getIntImmCost(Imm, Ty, CostKind); } // By default, prevent hoisting. @@ -86,7 +87,8 @@ } int RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { // Prevent hoisting in unknown cases. return TTI::TCC_Free; } diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -38,11 +38,12 @@ unsigned getInliningThresholdMultiplier() { return 3; } - int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty, TTI::TargetCostKind CostKind); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth); @@ -75,6 +76,7 @@ int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -88,28 +90,35 @@ unsigned getBoolVecToIntConversionCost(unsigned Opcode, Type *Dst, const Instruction *I); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); bool isFoldableLoad(const LoadInst *Ld, const Instruction *&FoldedValue); int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, const Instruction *I = nullptr); + unsigned VF = 1, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, + const Instruction *I = nullptr); int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, const Instruction *I = nullptr); /// @} }; diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp --- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -30,7 +30,8 @@ // //===----------------------------------------------------------------------===// -int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int SystemZTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -63,7 +64,8 @@ } int SystemZTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -177,11 +179,12 @@ break; } - return SystemZTTIImpl::getIntImmCost(Imm, Ty); + return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind); } int SystemZTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -226,7 +229,7 @@ return TTI::TCC_Free; break; } - return SystemZTTIImpl::getIntImmCost(Imm, Ty); + return SystemZTTIImpl::getIntImmCost(Imm, Ty, CostKind); } TargetTransformInfo::PopcntSupportKind @@ -258,7 +261,8 @@ } if (isa(&I)) { Type *MemAccessTy = I.getOperand(0)->getType(); - NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0); + NumStores += getMemoryOpCost(Instruction::Store, MemAccessTy, None, 0, + TTI::TCK_RecipThroughput); } } @@ -365,7 +369,8 @@ } int SystemZTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { @@ -494,7 +499,7 @@ // Return the cost of multiple scalar invocation plus the cost of // inserting and extracting the values. unsigned ScalarCost = - getArithmeticInstrCost(Opcode, Ty->getScalarType()); + getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind); unsigned Cost = (VF * ScalarCost) + getScalarizationOverhead(Ty, Args); // FIXME: VF 2 for these FP operations are currently just as // expensive as for VF 4. @@ -521,7 +526,7 @@ } // Fallback to the default implementation. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info, + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo, Args, CxtI); } @@ -684,6 +689,7 @@ } int SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { unsigned DstScalarBits = Dst->getScalarSizeInBits(); unsigned SrcScalarBits = Src->getScalarSizeInBits(); @@ -764,7 +770,7 @@ // inserting and extracting the values. Base implementation does not // realize float->int gets scalarized. unsigned ScalarCost = getCastInstrCost(Opcode, Dst->getScalarType(), - Src->getScalarType()); + Src->getScalarType(), CostKind); unsigned TotCost = VF * ScalarCost; bool NeedsInserts = true, NeedsExtracts = true; // FP128 registers do not get inserted or extracted. @@ -804,7 +810,7 @@ } } - return BaseT::getCastInstrCost(Opcode, Dst, Src, I); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); } // Scalar i8 / i16 operations will typically be made after first extending @@ -820,7 +826,9 @@ } int SystemZTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, - Type *CondTy, const Instruction *I) { + Type *CondTy, + TTI::TargetCostKind CostKind, + const Instruction *I) { if (!ValTy->isVectorTy()) { switch (Opcode) { case Instruction::ICmp: { @@ -895,7 +903,7 @@ } } - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, nullptr); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind); } int SystemZTTIImpl:: @@ -1009,6 +1017,7 @@ int SystemZTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { assert(!Src->isVoidTy() && "Invalid type"); @@ -1077,11 +1086,12 @@ ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); assert(isa(VecTy) && "Expect a vector type for interleaved memory op"); @@ -1142,21 +1152,23 @@ int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, unsigned VF, + TTI::TargetCostKind CostKind, const Instruction *I) { int Cost = getVectorIntrinsicInstrCost(ID, RetTy); if (Cost != -1) return Cost; - return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, I); + return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF, CostKind, I); } int SystemZTTIImpl::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) { int Cost = getVectorIntrinsicInstrCost(ID, RetTy); if (Cost != -1) return Cost; return BaseT::getIntrinsicInstrCost(ID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, I); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -57,6 +57,7 @@ unsigned getRegisterBitWidth(bool Vector) const; unsigned getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -44,13 +44,14 @@ } unsigned WebAssemblyTTIImpl::getArithmeticInstrCost( - unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, + TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef Args, const Instruction *CxtI) { unsigned Cost = BasicTTIImplBase::getArithmeticInstrCost( - Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + Opcode, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); if (auto *VTy = dyn_cast(Ty)) { switch (Opcode) { @@ -64,7 +65,7 @@ Opd2Info != TTI::OK_UniformConstantValue) Cost = VTy->getNumElements() * (TargetTransformInfo::TCC_Basic + - getArithmeticInstrCost(Opcode, VTy->getElementType()) + + getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) + TargetTransformInfo::TCC_Basic); break; } diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -119,6 +119,7 @@ unsigned getMaxInterleaveFactor(unsigned VF); int getArithmeticInstrCost( unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, @@ -128,66 +129,82 @@ int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, VectorType *SubTp); int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I = nullptr); int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); unsigned getScalarizationOverhead(Type *Ty, const APInt &DemandedElts, bool Insert, bool Extract); int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, - unsigned AddressSpace, const Instruction *I = nullptr); + unsigned AddressSpace, + TTI::TargetCostKind CostKind, + const Instruction *I = nullptr); int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, - unsigned AddressSpace); + unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency); int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment, + TTI::TargetCostKind CostKind, const Instruction *I); int getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr); unsigned getAtomicMemIntrinsicMaxElementSize() const; - int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, - ArrayRef Tys, FastMathFlags FMF, - unsigned ScalarizationCostPassed = UINT_MAX, - const Instruction *I = nullptr); + int getIntrinsicInstrCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, + FastMathFlags FMF, unsigned ScalarizationCostPassed = UINT_MAX, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr); + int getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF = 1, const Instruction *I = nullptr); + unsigned VF = 1, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, + const Instruction *I = nullptr); int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm); + bool IsPairwiseForm, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency); int getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned); int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsPairwiseForm, bool IsUnsigned); + bool IsPairwiseForm, bool IsUnsigned, + TTI::TargetCostKind CostKind); int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); int getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, bool UseMaskForCond = false, bool UseMaskForGaps = false); int getIntImmCost(int64_t); - int getIntImmCost(const APInt &Imm, Type *Ty); + int getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); unsigned getUserCost(const User *U, ArrayRef Operands, TTI::TargetCostKind); - int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty); + int getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind); int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, - Type *Ty); + Type *Ty, TTI::TargetCostKind CostKind); bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2); bool canMacroFuseCmp(); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -170,6 +170,7 @@ } int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::TargetCostKind CostKind, TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo, @@ -256,20 +257,25 @@ // The OperandValue properties may not be the same as that of the previous // operation; conservatively assume OP_None. int Cost = - 2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info, + 2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info, + Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info, + Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info, + Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); - Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info, + Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info, + Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); if (ISD == ISD::SREM) { // For SREM: (X % C) is the equivalent of (X - (X/C)*C) - Cost += getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info); - Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Op1Info, Op2Info); + Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info, + Op2Info); + Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info, + Op2Info); } return Cost; @@ -277,12 +283,14 @@ // Vector unsigned division/remainder will be simplified to shifts/masks. if (ISD == ISD::UDIV) - return getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info, + return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, + Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); else // UREM - return getArithmeticInstrCost(Instruction::And, Ty, Op1Info, Op2Info, + return getArithmeticInstrCost(Instruction::And, Ty, CostKind, + Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); } @@ -596,7 +604,8 @@ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) // On AVX512, a packed v32i16 shift left by a constant build_vector // is lowered into a vector multiply (vpmullw). - return getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info, + return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, + Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); } @@ -608,7 +617,8 @@ Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) // On AVX2, a packed v16i16 shift left by a constant build_vector // is lowered into a vector multiply (vpmullw). - return getArithmeticInstrCost(Instruction::Mul, Ty, Op1Info, Op2Info, + return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, + Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); @@ -916,13 +926,13 @@ if (LT.second.isVector() && (ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV || ISD == ISD::UREM)) { int ScalarCost = getArithmeticInstrCost( - Opcode, Ty->getScalarType(), Op1Info, Op2Info, + Opcode, Ty->getScalarType(), CostKind, Op1Info, Op2Info, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost; } // Fallback to the default implementation. - return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); + return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); } int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *BaseTp, @@ -1353,6 +1363,7 @@ } int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + TTI::TargetCostKind CostKind, const Instruction *I) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -1966,7 +1977,7 @@ // The function getSimpleVT only handles simple value types. if (!SrcTy.isSimple() || !DstTy.isSimple()) - return BaseT::getCastInstrCost(Opcode, Dst, Src); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind); MVT SimpleSrcTy = SrcTy.getSimpleVT(); MVT SimpleDstTy = DstTy.getSimpleVT(); @@ -2027,10 +2038,11 @@ return Entry->Cost; } - return BaseT::getCastInstrCost(Opcode, Dst, Src, I); + return BaseT::getCastInstrCost(Opcode, Dst, Src, CostKind, I); } int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + TTI::TargetCostKind CostKind, const Instruction *I) { // Legalize the type. std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); @@ -2214,7 +2226,7 @@ if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) return LT.first * (ExtraCost + Entry->Cost); - return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); } unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } @@ -2222,6 +2234,7 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed, + TTI::TargetCostKind CostKind, const Instruction *I) { // Costs should match the codegen from: // BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll @@ -2682,12 +2695,14 @@ } return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF, - ScalarizationCostPassed, I); + ScalarizationCostPassed, CostKind, I); } int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef Args, FastMathFlags FMF, - unsigned VF, const Instruction *I) { + unsigned VF, + TTI::TargetCostKind CostKind, + const Instruction *I) { static const CostTblEntry AVX512CostTbl[] = { { ISD::ROTL, MVT::v8i64, 1 }, { ISD::ROTL, MVT::v4i64, 1 }, @@ -2777,7 +2792,7 @@ return LT.first * Entry->Cost; } - return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF, I); + return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF, VF, CostKind, I); } int X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { @@ -2933,6 +2948,7 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, const Instruction *I) { // Handle non-power-of-two vectors such as <3 x float> if (VectorType *VTy = dyn_cast(Src)) { @@ -2953,7 +2969,7 @@ if (!isPowerOf2_32(NumElem)) { APInt DemandedElts = APInt::getAllOnesValue(NumElem); int Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(), Alignment, - AddressSpace); + AddressSpace, CostKind); int SplitCost = getScalarizationOverhead(Src, DemandedElts, Opcode == Instruction::Load, Opcode == Instruction::Store); @@ -2979,14 +2995,16 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, unsigned Alignment, - unsigned AddressSpace) { + unsigned AddressSpace, + TTI::TargetCostKind CostKind) { bool IsLoad = (Instruction::Load == Opcode); bool IsStore = (Instruction::Store == Opcode); VectorType *SrcVTy = dyn_cast(SrcTy); if (!SrcVTy) // To calculate scalar take the regular cost, without mask - return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace); + return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace, + CostKind); unsigned NumElem = SrcVTy->getNumElements(); VectorType *MaskTy = @@ -2999,14 +3017,16 @@ int MaskSplitCost = getScalarizationOverhead(MaskTy, DemandedElts, false, true); int ScalarCompareCost = getCmpSelInstrCost( - Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr); - int BranchCost = getCFInstrCost(Instruction::Br); + Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr, + CostKind); + int BranchCost = getCFInstrCost(Instruction::Br, CostKind); int MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); int ValueSplitCost = getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore); int MemopCost = NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), - MaybeAlign(Alignment), AddressSpace); + MaybeAlign(Alignment), AddressSpace, + CostKind); return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; } @@ -3061,10 +3081,11 @@ } int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, - bool IsPairwise) { + bool IsPairwise, + TTI::TargetCostKind CostKind) { // Just use the default implementation for pair reductions. if (IsPairwise) - return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise); + return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise, CostKind); // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput // and make it as the cost. @@ -3134,7 +3155,7 @@ // Type needs to be split. We need LT.first - 1 arithmetic ops. VectorType *SingleOpTy = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); - ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy); + ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind); ArithmeticCost *= LT.first - 1; } @@ -3204,7 +3225,7 @@ // Type needs to be split. We need LT.first - 1 arithmetic ops. Type *SingleOpTy = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); - ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy); + ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind); ArithmeticCost *= LT.first - 1; } @@ -3221,7 +3242,8 @@ if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy)) return ArithmeticCost + Entry->Cost; - return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise); + return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise, + CostKind); } unsigned NumVecElts = ValVTy->getNumElements(); @@ -3230,7 +3252,8 @@ // Special case power of 2 reductions where the scalar type isn't changed // by type legalization. if (!isPowerOf2_32(NumVecElts) || ScalarSize != MTy.getScalarSizeInBits()) - return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise); + return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise, + CostKind); unsigned ReductionCost = 0; @@ -3239,7 +3262,7 @@ MTy.getVectorNumElements() < ValVTy->getNumElements()) { // Type needs to be split. We need LT.first - 1 arithmetic ops. Ty = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); - ReductionCost = getArithmeticInstrCost(Opcode, Ty); + ReductionCost = getArithmeticInstrCost(Opcode, Ty, CostKind); ReductionCost *= LT.first - 1; NumVecElts = MTy.getVectorNumElements(); } @@ -3279,13 +3302,14 @@ auto *ShiftTy = VectorType::get( Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size); ReductionCost += getArithmeticInstrCost( - Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue, + Instruction::LShr, ShiftTy, CostKind, + TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OK_UniformConstantValue, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); } // Add the arithmetic op for this level. - ReductionCost += getArithmeticInstrCost(Opcode, Ty); + ReductionCost += getArithmeticInstrCost(Opcode, Ty, CostKind); } // Add the final extract element to the cost. @@ -3409,16 +3433,19 @@ CmpOpcode = Instruction::ICmp; } + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; // Otherwise fall back to cmp+select. - return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) + - getCmpSelInstrCost(Instruction::Select, Ty, CondTy, nullptr); + return getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) + + getCmpSelInstrCost(Instruction::Select, Ty, CondTy, CostKind); } int X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, - bool IsPairwise, bool IsUnsigned) { + bool IsPairwise, bool IsUnsigned, + TTI::TargetCostKind CostKind) { // Just use the default implementation for pair reductions. if (IsPairwise) - return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned); + return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned, + CostKind); std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); @@ -3534,7 +3561,8 @@ // by type legalization. if (!isPowerOf2_32(ValVTy->getNumElements()) || ScalarSize != MTy.getScalarSizeInBits()) - return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned); + return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned, + CostKind); // Now handle reduction with the legal type, taking into account size changes // at each level. @@ -3571,7 +3599,8 @@ VectorType *ShiftTy = VectorType::get( Type::getIntNTy(ValTy->getContext(), Size), 128 / Size); MinMaxCost += getArithmeticInstrCost( - Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue, + Instruction::LShr, ShiftTy, TTI::TCK_RecipThroughput, + TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OK_UniformConstantValue, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); } @@ -3599,7 +3628,8 @@ return 2 * TTI::TCC_Basic; } -int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) { +int X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -3634,7 +3664,7 @@ } int X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, - Type *Ty) { + Type *Ty, TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -3721,17 +3751,18 @@ if (Idx == ImmIdx) { int NumConstants = divideCeil(BitSize, 64); - int Cost = X86TTIImpl::getIntImmCost(Imm, Ty); + int Cost = X86TTIImpl::getIntImmCost(Imm, Ty, CostKind); return (Cost <= NumConstants * TTI::TCC_Basic) ? static_cast(TTI::TCC_Free) : Cost; } - return X86TTIImpl::getIntImmCost(Imm, Ty); + return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind); } int X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, - const APInt &Imm, Type *Ty) { + const APInt &Imm, Type *Ty, + TTI::TargetCostKind CostKind) { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); @@ -3762,7 +3793,7 @@ return TTI::TCC_Free; break; } - return X86TTIImpl::getIntImmCost(Imm, Ty); + return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind); } unsigned @@ -3842,7 +3873,8 @@ ? ST->getGatherOverhead() : ST->getScatterOverhead(); return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), - MaybeAlign(Alignment), AddressSpace); + MaybeAlign(Alignment), AddressSpace, + TTI::TCK_RecipThroughput); } /// Return the cost of full scalarization of gather / scatter operation. @@ -3858,6 +3890,7 @@ unsigned AddressSpace) { unsigned VF = cast(SrcVTy)->getNumElements(); APInt DemandedElts = APInt::getAllOnesValue(VF); + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; int MaskUnpackCost = 0; if (VariableMask) { @@ -3867,14 +3900,15 @@ getScalarizationOverhead(MaskTy, DemandedElts, false, true); int ScalarCompareCost = getCmpSelInstrCost(Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), - nullptr); - int BranchCost = getCFInstrCost(Instruction::Br); + nullptr, CostKind); + int BranchCost = getCFInstrCost(Instruction::Br, CostKind); MaskUnpackCost += VF * (BranchCost + ScalarCompareCost); } // The cost of the scalar loads/stores. int MemoryOpCost = VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), - MaybeAlign(Alignment), AddressSpace); + MaybeAlign(Alignment), AddressSpace, + CostKind); int InsertExtractCost = 0; if (Opcode == Instruction::Load) @@ -3892,10 +3926,11 @@ } /// Calculate the cost of Gather / Scatter operation -int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy, - Value *Ptr, bool VariableMask, - unsigned Alignment, - const Instruction *I = nullptr) { +int X86TTIImpl::getGatherScatterOpCost( + unsigned Opcode, Type *SrcVTy, Value *Ptr, bool VariableMask, + unsigned Alignment, TTI::TargetCostKind CostKind, + const Instruction *I = nullptr) { + assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter"); unsigned VF = cast(SrcVTy)->getNumElements(); PointerType *PtrTy = dyn_cast(Ptr->getType()); @@ -4177,19 +4212,21 @@ ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); // We currently Support only fully-interleaved groups, with no gaps. // TODO: Support also strided loads (interleaved-groups with gaps). if (Indices.size() && Indices.size() != Factor) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace); + Alignment, AddressSpace, + CostKind); // VecTy for interleave memop is . // So, for VF=4, Interleave Factor = 3, Element type = i32 we have @@ -4201,7 +4238,8 @@ // (see MachineValueType.h::getVectorVT()). if (!LegalVT.isVector()) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace); + Alignment, AddressSpace, + CostKind); unsigned VF = cast(VecTy)->getNumElements() / Factor; Type *ScalarTy = cast(VecTy)->getElementType(); @@ -4217,13 +4255,15 @@ VectorType::get(cast(VecTy)->getElementType(), LegalVT.getVectorNumElements()); unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, - MaybeAlign(Alignment), AddressSpace); + MaybeAlign(Alignment), AddressSpace, + CostKind); VectorType *VT = VectorType::get(ScalarTy, VF); EVT ETy = TLI->getValueType(DL, VT); if (!ETy.isSimple()) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace); + Alignment, AddressSpace, + CostKind); // TODO: Complete for other data-types and strides. // Each combination of Stride, ElementTy and VF results in a different @@ -4282,7 +4322,7 @@ } return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace); + Alignment, AddressSpace, CostKind); } // Get estimation for interleaved load/store operations and strided load. @@ -4294,12 +4334,13 @@ ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { if (UseMaskForCond || UseMaskForGaps) return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); // VecTy for interleave memop is . @@ -4318,7 +4359,8 @@ VectorType::get(cast(VecTy)->getElementType(), LegalVT.getVectorNumElements()); unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy, - MaybeAlign(Alignment), AddressSpace); + MaybeAlign(Alignment), AddressSpace, + CostKind); unsigned VF = cast(VecTy)->getNumElements() / Factor; MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); @@ -4421,6 +4463,7 @@ ArrayRef Indices, unsigned Alignment, unsigned AddressSpace, + TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) { @@ -4434,14 +4477,14 @@ }; if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI())) return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); if (ST->hasAVX2()) return getInterleavedMemoryOpCostAVX2(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, - Alignment, AddressSpace, + Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp --- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -363,10 +363,12 @@ // instruction and operand index. if (auto IntrInst = dyn_cast(Inst)) Cost = TTI->getIntImmCostIntrin(IntrInst->getIntrinsicID(), Idx, - ConstInt->getValue(), ConstInt->getType()); + ConstInt->getValue(), ConstInt->getType(), + TargetTransformInfo::TCK_SizeAndLatency); else Cost = TTI->getIntImmCostInst(Inst->getOpcode(), Idx, ConstInt->getValue(), - ConstInt->getType()); + ConstInt->getType(), + TargetTransformInfo::TCK_SizeAndLatency); // Ignore cheap integer constants. if (Cost > TargetTransformInfo::TCC_Basic) { @@ -416,7 +418,8 @@ // usually lowered to a load from constant pool. Such operation is unlikely // to be cheaper than compute it by , which can be lowered to // an ADD instruction or folded into Load/Store instruction. - int Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy); + int Cost = TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy, + TargetTransformInfo::TCK_SizeAndLatency); ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV]; ConstCandMapType::iterator Itr; bool Inserted; @@ -582,7 +585,8 @@ for (auto User : ConstCand->Uses) { unsigned Opcode = User.Inst->getOpcode(); unsigned OpndIdx = User.OpndIdx; - Cost += TTI->getIntImmCostInst(Opcode, OpndIdx, Value, Ty); + Cost += TTI->getIntImmCostInst(Opcode, OpndIdx, Value, Ty, + TargetTransformInfo::TCK_SizeAndLatency); LLVM_DEBUG(dbgs() << "Cost: " << Cost << "\n"); for (auto C2 = S; C2 != E; ++C2) { diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1990,7 +1990,9 @@ "non noop cast is found during rematerialization"); Type *SrcTy = CI->getOperand(0)->getType(); - Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, CI); + Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, + TargetTransformInfo::TCK_SizeAndLatency, + CI); } else if (GetElementPtrInst *GEP = dyn_cast(Instr)) { // Cost of the address calculation diff --git a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp --- a/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculateAroundPHIs.cpp @@ -232,7 +232,8 @@ continue; int &MatCost = InsertResult.first->second.MatCost; - MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType()); + MatCost = TTI.getIntImmCost(IncomingC->getValue(), IncomingC->getType(), + TargetTransformInfo::TCK_SizeAndLatency); NonFreeMat |= MatCost != TTI.TCC_Free; } if (!NonFreeMat) { @@ -283,12 +284,15 @@ int MatCost = IncomingConstantAndCostsAndCount.second.MatCost; int &FoldedCost = IncomingConstantAndCostsAndCount.second.FoldedCost; if (IID) - FoldedCost += TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(), - IncomingC->getType()); + FoldedCost += + TTI.getIntImmCostIntrin(IID, Idx, IncomingC->getValue(), + IncomingC->getType(), + TargetTransformInfo::TCK_SizeAndLatency); else FoldedCost += TTI.getIntImmCostInst(UserI->getOpcode(), Idx, - IncomingC->getValue(), IncomingC->getType()); + IncomingC->getValue(), IncomingC->getType(), + TargetTransformInfo::TCK_SizeAndLatency); // If we accumulate more folded cost for this incoming constant than // materialized cost, then we'll regress any edge with this constant so diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3277,7 +3277,8 @@ // to be vectors, so we need to extract individual elements from there, // execute VF scalar calls, and then gather the result into the vector return // value. - unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys); + unsigned ScalarCallCost = TTI.getCallInstrCost(F, ScalarRetTy, ScalarTys, + TTI::TCK_RecipThroughput); if (VF == 1) return ScalarCallCost; @@ -3302,7 +3303,8 @@ return Cost; // If the corresponding vector cost is cheaper, return its cost. - unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys); + unsigned VectorCallCost = TTI.getCallInstrCost(nullptr, RetTy, Tys, + TTI::TCK_RecipThroughput); if (VectorCallCost < Cost) { NeedToScalarize = false; return VectorCallCost; @@ -3320,7 +3322,9 @@ FMF = FPMO->getFastMathFlags(); SmallVector Operands(CI->arg_operands()); - return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF, CI); + return TTI.getIntrinsicInstrCost(ID, CI->getType(), Operands, FMF, VF, + TargetTransformInfo::TCK_RecipThroughput, + CI); } static Type *smallestIntegerVectorType(Type *T1, Type *T2) { @@ -5832,7 +5836,8 @@ // vectorized loop where the user of it is a vectorized instruction. const MaybeAlign Alignment = getLoadStoreAlignment(I); Cost += VF * TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(), - Alignment, AS); + Alignment, AS, + TTI::TCK_RecipThroughput); // Get the overhead of the extractelement and insertelement instructions // we might create due to scalarization. @@ -5860,6 +5865,7 @@ Value *Ptr = getLoadStorePointerOperand(I); unsigned AS = getLoadStoreAddressSpace(I); int ConsecutiveStride = Legal->isConsecutivePtr(Ptr); + enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) && "Stride should be 1 or -1 for consecutive memory access"); @@ -5867,9 +5873,11 @@ unsigned Cost = 0; if (Legal->isMaskRequired(I)) Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, - Alignment ? Alignment->value() : 0, AS); + Alignment ? Alignment->value() : 0, AS, + CostKind); else - Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, I); + Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS, + CostKind, I); bool Reverse = ConsecutiveStride < 0; if (Reverse) @@ -5883,16 +5891,19 @@ auto *VectorTy = cast(ToVectorTy(ValTy, VF)); const MaybeAlign Alignment = getLoadStoreAlignment(I); unsigned AS = getLoadStoreAddressSpace(I); + enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; if (isa(I)) { return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS) + + TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS, + CostKind) + TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy); } StoreInst *SI = cast(I); bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand()); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS) + + TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS, + CostKind) + (isLoopInvariantStoreValue ? 0 : TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy, @@ -5909,7 +5920,9 @@ return TTI.getAddressComputationCost(VectorTy) + TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr, Legal->isMaskRequired(I), - Alignment ? Alignment->value() : 0, I); + Alignment ? Alignment->value() : 0, + TargetTransformInfo::TCK_RecipThroughput, + I); } unsigned LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, @@ -5938,7 +5951,8 @@ Group->requiresScalarEpilogue() && !isScalarEpilogueAllowed(); unsigned Cost = TTI.getInterleavedMemoryOpCost( I->getOpcode(), WideVecTy, Group->getFactor(), Indices, - Group->getAlign().value(), AS, Legal->isMaskRequired(I), UseMaskForGaps); + Group->getAlign().value(), AS, TTI::TCK_RecipThroughput, + Legal->isMaskRequired(I), UseMaskForGaps); if (Group->isReverse()) { // TODO: Add support for reversed masked interleaved access. @@ -5960,7 +5974,8 @@ unsigned AS = getLoadStoreAddressSpace(I); return TTI.getAddressComputationCost(ValTy) + - TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, I); + TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, + TTI::TCK_RecipThroughput, I); } return getWideningCost(I, VF); } @@ -6182,6 +6197,7 @@ RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF); auto SE = PSE.getSE(); + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; // TODO: We need to estimate the cost of intrinsic calls. switch (I->getOpcode()) { @@ -6238,7 +6254,8 @@ return (Phi->getNumIncomingValues() - 1) * TTI.getCmpSelInstrCost( Instruction::Select, ToVectorTy(Phi->getType(), VF), - ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF)); + ToVectorTy(Type::getInt1Ty(Phi->getContext()), VF), + CostKind); return TTI.getCFInstrCost(Instruction::PHI); } @@ -6260,7 +6277,7 @@ Cost += VF * TTI.getCFInstrCost(Instruction::PHI); // The cost of the non-predicated instruction. - Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy); + Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy, CostKind); // The cost of insertelement and extractelement instructions needed for // scalarization. @@ -6301,13 +6318,15 @@ SmallVector Operands(I->operand_values()); unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; return N * TTI.getArithmeticInstrCost( - I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue, + I->getOpcode(), VectorTy, CostKind, + TargetTransformInfo::OK_AnyValue, Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I); } case Instruction::FNeg: { unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; return N * TTI.getArithmeticInstrCost( - I->getOpcode(), VectorTy, TargetTransformInfo::OK_AnyValue, + I->getOpcode(), VectorTy, CostKind, + TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None, TargetTransformInfo::OP_None, I->getOperand(0), I); @@ -6320,7 +6339,8 @@ if (!ScalarCond) CondTy = VectorType::get(CondTy, VF); - return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, I); + return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, CondTy, + CostKind, I); } case Instruction::ICmp: case Instruction::FCmp: { @@ -6329,7 +6349,8 @@ if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF)) ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]); VectorTy = ToVectorTy(ValTy, VF); - return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, I); + return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy, nullptr, CostKind, + I); } case Instruction::Store: case Instruction::Load: { @@ -6362,7 +6383,7 @@ if (isOptimizableIVTruncate(I, VF)) { auto *Trunc = cast(I); return TTI.getCastInstrCost(Instruction::Trunc, Trunc->getDestTy(), - Trunc->getSrcTy(), Trunc); + Trunc->getSrcTy(), CostKind, Trunc); } Type *SrcScalarTy = I->getOperand(0)->getType(); @@ -6388,7 +6409,8 @@ } unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; - return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, I); + return N * TTI.getCastInstrCost(I->getOpcode(), VectorTy, SrcVecTy, + CostKind, I); } case Instruction::Call: { bool NeedToScalarize; @@ -6401,7 +6423,8 @@ default: // The cost of executing VF copies of the scalar instruction. This opcode // is unknown. Assume that it is the same as 'mul'. - return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) + + return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, + CostKind) + getScalarizationOverhead(I, VF); } // end of switch. } diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3259,7 +3259,8 @@ VectorType::get(Arg->getType(), VecTy->getNumElements())); // If the corresponding vector call is cheaper, return its cost. - LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys); + LibCost = TTI->getCallInstrCost(nullptr, VecTy, VecTys, + TTI::TCK_RecipThroughput); } return {IntrinsicCost, LibCost}; } @@ -3273,6 +3274,7 @@ else if (CmpInst *CI = dyn_cast(VL[0])) ScalarTy = CI->getOperand(0)->getType(); VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; // If we have computed a smaller type for the expression, update VecTy so // that the costs will be accurate. @@ -3380,7 +3382,8 @@ Ext->getOpcode(), Ext->getType(), VecTy, i); // Add back the cost of s|zext which is subtracted separately. DeadCost += TTI->getCastInstrCost( - Ext->getOpcode(), Ext->getType(), E->getType(), Ext); + Ext->getOpcode(), Ext->getType(), E->getType(), CostKind, + Ext); continue; } } @@ -3404,7 +3407,8 @@ case Instruction::BitCast: { Type *SrcTy = VL0->getOperand(0)->getType(); int ScalarEltCost = - TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, VL0); + TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, CostKind, + VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } @@ -3417,7 +3421,8 @@ // Check if the values are candidates to demote. if (!MinBWs.count(VL0) || VecTy != SrcVecTy) { VecCost = ReuseShuffleCost + - TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy, VL0); + TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy, + CostKind, VL0); } return VecCost - ScalarCost; } @@ -3426,13 +3431,15 @@ case Instruction::Select: { // Calculate the cost of this instruction. int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, - Builder.getInt1Ty(), VL0); + Builder.getInt1Ty(), + CostKind, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VL0); + int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, + CostKind, VL0); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::FNeg: @@ -3493,13 +3500,15 @@ SmallVector Operands(VL0->operand_values()); int ScalarEltCost = TTI->getArithmeticInstrCost( - E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0); + E->getOpcode(), ScalarTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP, + Operands, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } int ScalarCost = VecTy->getNumElements() * ScalarEltCost; int VecCost = TTI->getArithmeticInstrCost( - E->getOpcode(), VecTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands, VL0); + E->getOpcode(), VecTy, CostKind, Op1VK, Op2VK, Op1VP, Op2VP, + Operands, VL0); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::GetElementPtr: { @@ -3509,26 +3518,30 @@ TargetTransformInfo::OK_UniformConstantValue; int ScalarEltCost = - TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK); + TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, CostKind, + Op1VK, Op2VK); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } int ScalarCost = VecTy->getNumElements() * ScalarEltCost; int VecCost = - TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK); + TTI->getArithmeticInstrCost(Instruction::Add, VecTy, CostKind, + Op1VK, Op2VK); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::Load: { // Cost of wide load - cost of scalar loads. MaybeAlign alignment(cast(VL0)->getAlignment()); int ScalarEltCost = - TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, VL0); + TTI->getMemoryOpCost(Instruction::Load, ScalarTy, alignment, 0, + CostKind, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } int ScalarLdCost = VecTy->getNumElements() * ScalarEltCost; int VecLdCost = - TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0, VL0); + TTI->getMemoryOpCost(Instruction::Load, VecTy, alignment, 0, + CostKind, VL0); if (!E->ReorderIndices.empty()) { // TODO: Merge this shuffle with the ReuseShuffleCost. VecLdCost += TTI->getShuffleCost( @@ -3543,12 +3556,13 @@ cast(IsReorder ? VL[E->ReorderIndices.front()] : VL0); MaybeAlign Alignment(SI->getAlignment()); int ScalarEltCost = - TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0, VL0); + TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0, + CostKind, VL0); if (NeedToShuffleReuses) ReuseShuffleCost = -(ReuseShuffleNumbers - VL.size()) * ScalarEltCost; int ScalarStCost = VecTy->getNumElements() * ScalarEltCost; int VecStCost = TTI->getMemoryOpCost(Instruction::Store, - VecTy, Alignment, 0, VL0); + VecTy, Alignment, 0, CostKind, VL0); if (IsReorder) { // TODO: Merge this shuffle with the ReuseShuffleCost. VecStCost += TTI->getShuffleCost( @@ -3570,7 +3584,7 @@ FMF = FPMO->getFastMathFlags(); int ScalarEltCost = - TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF); + TTI->getIntrinsicInstrCost(ID, ScalarTy, ScalarTys, FMF, 1, CostKind); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } @@ -3596,34 +3610,34 @@ if (NeedToShuffleReuses) { for (unsigned Idx : E->ReuseShuffleIndices) { Instruction *I = cast(VL[Idx]); - ReuseShuffleCost -= TTI->getInstructionCost( - I, TargetTransformInfo::TCK_RecipThroughput); + ReuseShuffleCost -= TTI->getInstructionCost(I, CostKind); } for (Value *V : VL) { Instruction *I = cast(V); - ReuseShuffleCost += TTI->getInstructionCost( - I, TargetTransformInfo::TCK_RecipThroughput); + ReuseShuffleCost += TTI->getInstructionCost(I, CostKind); } } for (Value *V : VL) { Instruction *I = cast(V); assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); - ScalarCost += TTI->getInstructionCost( - I, TargetTransformInfo::TCK_RecipThroughput); + ScalarCost += TTI->getInstructionCost(I, CostKind); } // VecCost is equal to sum of the cost of creating 2 vectors // and the cost of creating shuffle. int VecCost = 0; if (Instruction::isBinaryOp(E->getOpcode())) { - VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy); - VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy); + VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind); + VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, + CostKind); } else { Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType(); Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size()); VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size()); - VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty); - VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty); + VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty, + CostKind); + VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, + CostKind); } VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0); return ReuseShuffleCost + VecCost - ScalarCost;