diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1182,9 +1182,9 @@ /// Split: /// (v0, v1, v2, v3) /// ((v0+v2), (v1+v3), undef, undef) - int getArithmeticReductionCost( - unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; + InstructionCost getArithmeticReductionCost( + unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; int getMinMaxReductionCost( VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, @@ -1606,9 +1606,10 @@ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0; - virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind) = 0; + virtual InstructionCost + getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) = 0; virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned, TTI::TargetCostKind CostKind) = 0; @@ -2096,9 +2097,10 @@ Alignment, AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); } - int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind) override { + InstructionCost + getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) override { return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, CostKind); } diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -616,8 +616,8 @@ return 0; } - unsigned getArithmeticReductionCost(unsigned, VectorType *, bool, - TTI::TargetCostKind) const { + InstructionCost getArithmeticReductionCost(unsigned, VectorType *, bool, + TTI::TargetCostKind) const { return 1; } diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1937,9 +1937,9 @@ /// /// The cost model should take into account that the actual length of the /// vector is reduced on each iteration. - unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwise, - TTI::TargetCostKind CostKind) { + InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + bool IsPairwise, + TTI::TargetCostKind CostKind) { Type *ScalarTy = Ty->getElementType(); unsigned NumVecElts = cast(Ty)->getNumElements(); if ((Opcode == Instruction::Or || Opcode == Instruction::And) && @@ -1952,10 +1952,8 @@ // %val = bitcast to iReduxWidth // %res = cmp eq iReduxWidth %val, 11111 Type *ValTy = IntegerType::get(Ty->getContext(), NumVecElts); - return *thisT() - ->getCastInstrCost(Instruction::BitCast, ValTy, Ty, - TTI::CastContextHint::None, CostKind) - .getValue() + + return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty, + TTI::CastContextHint::None, CostKind) + thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy, CmpInst::makeCmpResultType(ValTy), CmpInst::BAD_ICMP_PREDICATE, CostKind); @@ -2075,7 +2073,7 @@ // Without any native support, this is equivalent to the cost of // vecreduce.add(ext) or if IsMLA vecreduce.add(mul(ext, ext)) VectorType *ExtTy = VectorType::get(ResTy, Ty); - unsigned RedCost = thisT()->getArithmeticReductionCost( + InstructionCost RedCost = thisT()->getArithmeticReductionCost( Instruction::Add, ExtTy, false, CostKind); InstructionCost MulCost = 0; InstructionCost ExtCost = thisT()->getCastInstrCost( diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -893,12 +893,11 @@ return Cost; } -int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, - VectorType *Ty, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind) const { - int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, - CostKind); +InstructionCost TargetTransformInfo::getArithmeticReductionCost( + unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, + TTI::TargetCostKind CostKind) const { + InstructionCost Cost = + TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -150,9 +150,10 @@ bool IsPairwise, bool IsUnsigned, TTI::TargetCostKind CostKind); - int getArithmeticReductionCostSVE(unsigned Opcode, VectorType *ValTy, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind); + InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, + VectorType *ValTy, + bool IsPairwiseForm, + TTI::TargetCostKind CostKind); int getArithmeticInstrCost( unsigned Opcode, Type *Ty, @@ -278,9 +279,9 @@ bool isLegalToVectorizeReduction(RecurrenceDescriptor RdxDesc, ElementCount VF) const; - int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); + InstructionCost getArithmeticReductionCost( + unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); int getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef Mask, int Index, VectorType *SubTp); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1271,7 +1271,7 @@ return LegalizationCost + /*Cost of horizontal reduction*/ 2; } -int AArch64TTIImpl::getArithmeticReductionCostSVE( +InstructionCost AArch64TTIImpl::getArithmeticReductionCostSVE( unsigned Opcode, VectorType *ValTy, bool IsPairwise, TTI::TargetCostKind CostKind) { assert(!IsPairwise && "Cannot be pair wise to continue"); @@ -1295,16 +1295,14 @@ case ISD::FADD: return LegalizationCost + 2; default: - // TODO: Replace for invalid when InstructionCost is used - // cases not supported by SVE - return 16; + return InstructionCost::getInvalid(); } } -int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, - VectorType *ValTy, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind) { +InstructionCost +AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) { if (isa(ValTy)) return getArithmeticReductionCostSVE(Opcode, ValTy, IsPairwiseForm, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -209,10 +209,8 @@ int getInlinerVectorBonusPercent() { return 0; } - int getArithmeticReductionCost( - unsigned Opcode, - VectorType *Ty, - bool IsPairwise, + InstructionCost getArithmeticReductionCost( + unsigned Opcode, VectorType *Ty, bool IsPairwise, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput); InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -846,9 +846,10 @@ return BaseT::getCFInstrCost(Opcode, CostKind, I); } -int GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwise, - TTI::TargetCostKind CostKind) { +InstructionCost +GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, + bool IsPairwise, + TTI::TargetCostKind CostKind) { EVT OrigTy = TLI->getValueType(DL, Ty); // Computes cost on targets that have packed math instructions(which support diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -247,9 +247,9 @@ TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - int getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind); + InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, + bool IsPairwiseForm, + TTI::TargetCostKind CostKind); InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *ValTy, TTI::TargetCostKind CostKind); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1584,9 +1584,10 @@ return ScalarCost; } -int ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind) { +InstructionCost +ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) { EVT ValVT = TLI->getValueType(DL, ValTy); int ISD = TLI->InstructionOpcodeToISD(Opcode); if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -175,9 +175,9 @@ InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); - int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, - bool IsPairwiseForm, - TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency); + InstructionCost getArithmeticReductionCost( + unsigned Opcode, VectorType *Ty, bool IsPairwiseForm, + TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency); int getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned); diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3340,9 +3340,10 @@ return BaseT::getAddressComputationCost(Ty, SE, Ptr); } -int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, - bool IsPairwise, - TTI::TargetCostKind CostKind) { +InstructionCost +X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, + bool IsPairwise, + TTI::TargetCostKind CostKind) { // Just use the default implementation for pair reductions. if (IsPairwise) return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise, CostKind); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6990,8 +6990,8 @@ RecurrenceDescriptor RdxDesc = Legal->getReductionVars()[cast(ReductionPhi)]; - unsigned BaseCost = TTI.getArithmeticReductionCost(RdxDesc.getOpcode(), - VectorTy, false, CostKind); + InstructionCost BaseCost = TTI.getArithmeticReductionCost( + RdxDesc.getOpcode(), VectorTy, false, CostKind); // Get the operand that was not the reduction chain and match it to one of the // patterns, returning the better cost if it is found. diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -22,8 +22,8 @@ ; CHECK-LABEL: 'reductions' ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32( %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64( %v1) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32( %v0) -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64( %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32( %v0) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64( %v1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32( %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64( %v1) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32( %v0)