Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1394,8 +1394,7 @@ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; InstructionCost getMinMaxReductionCost( - VectorType *Ty, VectorType *CondTy, bool IsUnsigned, - FastMathFlags FMF = FastMathFlags(), + Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF = FastMathFlags(), TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; /// Calculate the cost of an extended reduction pattern, similar to @@ -1941,8 +1940,8 @@ std::optional FMF, TTI::TargetCostKind CostKind) = 0; virtual InstructionCost - getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, - FastMathFlags FMF, TTI::TargetCostKind CostKind) = 0; + getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, + TTI::TargetCostKind CostKind) = 0; virtual InstructionCost getExtendedReductionCost( unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, @@ -2562,10 +2561,9 @@ return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); } InstructionCost - getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, - FastMathFlags FMF, + getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) override { - return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind); + return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind); } InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -726,7 +726,7 @@ return 1; } - InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool, + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, FastMathFlags, TTI::TargetCostKind) const { return 1; Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1887,17 +1887,23 @@ return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy, FMF, CostKind); case Intrinsic::vector_reduce_smax: + return thisT()->getMinMaxReductionCost(Intrinsic::smax, VecOpTy, + ICA.getFlags(), CostKind); case Intrinsic::vector_reduce_smin: - case Intrinsic::vector_reduce_fmax: - case Intrinsic::vector_reduce_fmin: - return thisT()->getMinMaxReductionCost( - VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), - /*IsUnsigned=*/false, ICA.getFlags(), CostKind); + return thisT()->getMinMaxReductionCost(Intrinsic::smin, VecOpTy, + ICA.getFlags(), CostKind); case Intrinsic::vector_reduce_umax: + return thisT()->getMinMaxReductionCost(Intrinsic::umax, VecOpTy, + ICA.getFlags(), CostKind); case Intrinsic::vector_reduce_umin: - return thisT()->getMinMaxReductionCost( - VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), - /*IsUnsigned=*/true, ICA.getFlags(), CostKind); + return thisT()->getMinMaxReductionCost(Intrinsic::umin, VecOpTy, + ICA.getFlags(), CostKind); + case Intrinsic::vector_reduce_fmax: + return thisT()->getMinMaxReductionCost(Intrinsic::maxnum, VecOpTy, + ICA.getFlags(), CostKind); + case Intrinsic::vector_reduce_fmin: + return thisT()->getMinMaxReductionCost(Intrinsic::minnum, VecOpTy, + ICA.getFlags(), CostKind); case Intrinsic::abs: { // abs(X) = select(icmp(X,0),X,sub(0,X)) Type *CondTy = RetTy->getWithNewBitWidth(1); @@ -2345,8 +2351,8 @@ /// Try to calculate op costs for min/max reduction operations. /// \param CondTy Conditional type for the Select instruction. - InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind) { // Targets must implement a default value for the scalable case, since // we don't know how many lanes the vector has. @@ -2354,17 +2360,8 @@ return InstructionCost::getInvalid(); Type *ScalarTy = Ty->getElementType(); - Type *ScalarCondTy = CondTy->getElementType(); unsigned NumVecElts = cast(Ty)->getNumElements(); unsigned NumReduxLevels = Log2_32(NumVecElts); - unsigned CmpOpcode; - if (Ty->isFPOrFPVectorTy()) { - CmpOpcode = Instruction::FCmp; - } else { - assert(Ty->isIntOrIntVectorTy() && - "expecting floating point or integer type for min/max reduction"); - CmpOpcode = Instruction::ICmp; - } InstructionCost MinMaxCost = 0; InstructionCost ShuffleCost = 0; std::pair LT = thisT()->getTypeLegalizationCost(Ty); @@ -2374,16 +2371,13 @@ while (NumVecElts > MVTLen) { NumVecElts /= 2; auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); - CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts); ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt, CostKind, NumVecElts, SubTy); - MinMaxCost += - thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind) + - thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); + + IntrinsicCostAttributes Attrs(IID, SubTy, {SubTy, SubTy}, FMF); + MinMaxCost += getIntrinsicInstrCost(Attrs, CostKind); Ty = SubTy; ++LongVectorCount; } @@ -2397,12 +2391,8 @@ ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, std::nullopt, CostKind, 0, Ty); - MinMaxCost += - NumReduxLevels * - (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind) + - thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind)); + IntrinsicCostAttributes Attrs(IID, Ty, {Ty, Ty}, FMF); + MinMaxCost += NumReduxLevels * getIntrinsicInstrCost(Attrs, CostKind); // The last min/max should be in vector registers and we counted it above. // So just need a single extractelement. return ShuffleCost + MinMaxCost + Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1054,10 +1054,10 @@ } InstructionCost TargetTransformInfo::getMinMaxReductionCost( - VectorType *Ty, VectorType *CondTy, bool IsUnsigned, FastMathFlags FMF, + Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const { InstructionCost Cost = - TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind); + TTIImpl->getMinMaxReductionCost(IID, Ty, FMF, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -179,8 +179,8 @@ TTI::TargetCostKind CostKind, unsigned Index); - InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind); InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3263,26 +3263,18 @@ } InstructionCost -AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, +AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind) { std::pair LT = getTypeLegalizationCost(Ty); if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16()) - return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind); - - assert((isa(Ty) == isa(CondTy)) && - "Both vector needs to be equally scalable"); + return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind); InstructionCost LegalizationCost = 0; if (LT.first > 1) { Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext()); - Intrinsic::ID MinMaxOpcode = - Ty->isFPOrFPVectorTy() - ? Intrinsic::maxnum - : (IsUnsigned ? Intrinsic::umin : Intrinsic::smin); - IntrinsicCostAttributes Attrs(MinMaxOpcode, LegalVTy, {LegalVTy, LegalVTy}, - FMF); + IntrinsicCostAttributes Attrs(IID, LegalVTy, {LegalVTy, LegalVTy}, FMF); LegalizationCost = getIntrinsicInstrCost(Attrs, CostKind) * (LT.first - 1); } Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -249,8 +249,8 @@ InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); - InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind); }; Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -789,15 +789,15 @@ } InstructionCost -GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, +GCNTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind) { EVT OrigTy = TLI->getValueType(DL, Ty); // Computes cost on targets that have packed math instructions(which support // 16-bit types only). if (!ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16) - return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind); + return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind); std::pair LT = getTypeLegalizationCost(Ty); return LT.first * getHalfRateInstrCost(CostKind); Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h =================================================================== --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -148,8 +148,8 @@ TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind); InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Index: llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1212,15 +1212,15 @@ } InstructionCost -RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, +RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind) { if (isa(Ty) && !ST->useRVVForFixedLengthVectors()) - return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind); + return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind); // Skip if scalar size of Ty is bigger than ELEN. if (Ty->getScalarSizeInBits() > ST->getELEN()) - return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind); + return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind); std::pair LT = getTypeLegalizationCost(Ty); if (Ty->getElementType()->isIntegerTy(1)) Index: llvm/lib/Target/X86/X86TargetTransformInfo.h =================================================================== --- llvm/lib/Target/X86/X86TargetTransformInfo.h +++ llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -207,12 +207,12 @@ std::optional FMF, TTI::TargetCostKind CostKind); - InstructionCost getMinMaxCost(Type *Ty, Type *CondTy, - TTI::TargetCostKind CostKind, bool IsUnsigned, + InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, + TTI::TargetCostKind CostKind, FastMathFlags FMF); - InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind); InstructionCost getInterleavedMemoryOpCost( Index: llvm/lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5255,25 +5255,16 @@ CostKind, 0, nullptr, nullptr); } -InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, +InstructionCost X86TTIImpl::getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, - bool IsUnsigned, FastMathFlags FMF) { - Intrinsic::ID Id; - if (Ty->isIntOrIntVectorTy()) { - Id = IsUnsigned ? Intrinsic::umin : Intrinsic::smin; - } else { - assert(Ty->isFPOrFPVectorTy() && - "Expected float point or integer vector type."); - Id = Intrinsic::minnum; - } - - IntrinsicCostAttributes ICA(Id, Ty, {Ty, Ty}, FMF); + FastMathFlags FMF) { + IntrinsicCostAttributes ICA(IID, Ty, {Ty, Ty}, FMF); return getIntrinsicInstrCost(ICA, CostKind); } InstructionCost -X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, +X86TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *ValTy, + FastMathFlags FMF, TTI::TargetCostKind CostKind) { std::pair LT = getTypeLegalizationCost(ValTy); @@ -5281,11 +5272,14 @@ int ISD; if (ValTy->isIntOrIntVectorTy()) { - ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN; + ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ? ISD::UMIN + : ISD::SMIN; } else { assert(ValTy->isFPOrFPVectorTy() && "Expected float point or integer vector type."); - ISD = ISD::FMINNUM; + ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum) + ? ISD::FMINNUM + : ISD::FMINIMUM; } // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput @@ -5361,9 +5355,7 @@ // Type needs to be split. We need LT.first - 1 operations ops. Ty = FixedVectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); - auto *SubCondTy = FixedVectorType::get(CondTy->getElementType(), - MTy.getVectorNumElements()); - MinMaxCost = getMinMaxCost(Ty, SubCondTy, CostKind, IsUnsigned, FMF); + MinMaxCost = getMinMaxCost(IID, Ty, CostKind, FMF); MinMaxCost *= LT.first - 1; NumVecElts = MTy.getVectorNumElements(); } @@ -5390,8 +5382,7 @@ // by type legalization. if (!isPowerOf2_32(ValVTy->getNumElements()) || ScalarSize != MTy.getScalarSizeInBits()) - return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsUnsigned, FMF, - CostKind); + return BaseT::getMinMaxReductionCost(IID, ValTy, FMF, CostKind); // Now handle reduction with the legal type, taking into account size changes // at each level. @@ -5435,9 +5426,7 @@ } // Add the arithmetic op for this level. - auto *SubCondTy = - FixedVectorType::get(CondTy->getElementType(), Ty->getNumElements()); - MinMaxCost += getMinMaxCost(Ty, SubCondTy, CostKind, IsUnsigned, FMF); + MinMaxCost += getMinMaxCost(IID, Ty, CostKind, FMF); } // Add the final extract element to the cost. Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -13796,15 +13796,9 @@ case RecurKind::SMin: case RecurKind::UMax: case RecurKind::UMin: { - if (!AllConsts) { - auto *VecCondTy = - cast(CmpInst::makeCmpResultType(VectorTy)); - bool IsUnsigned = - RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin; - VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, - IsUnsigned, FMF, CostKind); - } Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind); + if (!AllConsts) + VectorCost = TTI->getMinMaxReductionCost(Id, VectorTy, FMF, CostKind); ScalarCost = EvaluateScalarCost([&]() { IntrinsicCostAttributes ICA(Id, ScalarTy, {ScalarTy, ScalarTy}, FMF); return TTI->getIntrinsicInstrCost(ICA, CostKind); Index: llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll =================================================================== --- llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll +++ llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll @@ -162,10 +162,10 @@ define void @reduce_fmin16() { ; CHECK-NOF16-LABEL: 'reduce_fmin16' -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef) @@ -196,10 +196,10 @@ define void @reduce_fmax16() { ; CHECK-NOF16-LABEL: 'reduce_fmax16' -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef) Index: llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll =================================================================== --- llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -294,19 +294,19 @@ define void @reduce_fmax(<16 x float> %va) { ; THRU-LABEL: 'reduce_fmax' -; THRU-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; THRU-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fmax' -; LATE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmax' -; SIZE-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'reduce_fmax' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)