diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1409,8 +1409,7 @@ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; InstructionCost getMinMaxReductionCost( - VectorType *Ty, VectorType *CondTy, bool IsUnsigned, - FastMathFlags FMF = FastMathFlags(), + Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF = FastMathFlags(), TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; /// Calculate the cost of an extended reduction pattern, similar to @@ -1959,8 +1958,8 @@ std::optional FMF, TTI::TargetCostKind CostKind) = 0; virtual InstructionCost - getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, - FastMathFlags FMF, TTI::TargetCostKind CostKind) = 0; + getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, + TTI::TargetCostKind CostKind) = 0; virtual InstructionCost getExtendedReductionCost( unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, @@ -2584,10 +2583,9 @@ return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); } InstructionCost - getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, - FastMathFlags FMF, + getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) override { - return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind); + return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind); } InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -729,7 +729,7 @@ return 1; } - InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool, + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, FastMathFlags, TTI::TargetCostKind) const { return 1; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1890,17 +1890,23 @@ return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy, FMF, CostKind); case Intrinsic::vector_reduce_smax: + return thisT()->getMinMaxReductionCost(Intrinsic::smax, VecOpTy, + ICA.getFlags(), CostKind); case Intrinsic::vector_reduce_smin: - case Intrinsic::vector_reduce_fmax: - case Intrinsic::vector_reduce_fmin: - return thisT()->getMinMaxReductionCost( - VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), - /*IsUnsigned=*/false, ICA.getFlags(), CostKind); + return thisT()->getMinMaxReductionCost(Intrinsic::smin, VecOpTy, + ICA.getFlags(), CostKind); case Intrinsic::vector_reduce_umax: + return thisT()->getMinMaxReductionCost(Intrinsic::umax, VecOpTy, + ICA.getFlags(), CostKind); case Intrinsic::vector_reduce_umin: - return thisT()->getMinMaxReductionCost( - VecOpTy, cast(CmpInst::makeCmpResultType(VecOpTy)), - /*IsUnsigned=*/true, ICA.getFlags(), CostKind); + return thisT()->getMinMaxReductionCost(Intrinsic::umin, VecOpTy, + ICA.getFlags(), CostKind); + case Intrinsic::vector_reduce_fmax: + return thisT()->getMinMaxReductionCost(Intrinsic::maxnum, VecOpTy, + ICA.getFlags(), CostKind); + case Intrinsic::vector_reduce_fmin: + return thisT()->getMinMaxReductionCost(Intrinsic::minnum, VecOpTy, + ICA.getFlags(), CostKind); case Intrinsic::abs: { // abs(X) = select(icmp(X,0),X,sub(0,X)) Type *CondTy = RetTy->getWithNewBitWidth(1); @@ -2348,8 +2354,8 @@ /// Try to calculate op costs for min/max reduction operations. /// \param CondTy Conditional type for the Select instruction. - InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind) { // Targets must implement a default value for the scalable case, since // we don't know how many lanes the vector has. @@ -2357,17 +2363,8 @@ return InstructionCost::getInvalid(); Type *ScalarTy = Ty->getElementType(); - Type *ScalarCondTy = CondTy->getElementType(); unsigned NumVecElts = cast(Ty)->getNumElements(); unsigned NumReduxLevels = Log2_32(NumVecElts); - unsigned CmpOpcode; - if (Ty->isFPOrFPVectorTy()) { - CmpOpcode = Instruction::FCmp; - } else { - assert(Ty->isIntOrIntVectorTy() && - "expecting floating point or integer type for min/max reduction"); - CmpOpcode = Instruction::ICmp; - } InstructionCost MinMaxCost = 0; InstructionCost ShuffleCost = 0; std::pair LT = thisT()->getTypeLegalizationCost(Ty); @@ -2377,16 +2374,13 @@ while (NumVecElts > MVTLen) { NumVecElts /= 2; auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); - CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts); ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt, CostKind, NumVecElts, SubTy); - MinMaxCost += - thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind) + - thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind); + + IntrinsicCostAttributes Attrs(IID, SubTy, {SubTy, SubTy}, FMF); + MinMaxCost += getIntrinsicInstrCost(Attrs, CostKind); Ty = SubTy; ++LongVectorCount; } @@ -2400,12 +2394,8 @@ ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, std::nullopt, CostKind, 0, Ty); - MinMaxCost += - NumReduxLevels * - (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind) + - thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, - CmpInst::BAD_ICMP_PREDICATE, CostKind)); + IntrinsicCostAttributes Attrs(IID, Ty, {Ty, Ty}, FMF); + MinMaxCost += NumReduxLevels * getIntrinsicInstrCost(Attrs, CostKind); // The last min/max should be in vector registers and we counted it above. // So just need a single extractelement. return ShuffleCost + MinMaxCost + diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1058,10 +1058,10 @@ } InstructionCost TargetTransformInfo::getMinMaxReductionCost( - VectorType *Ty, VectorType *CondTy, bool IsUnsigned, FastMathFlags FMF, + Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const { InstructionCost Cost = - TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind); + TTIImpl->getMinMaxReductionCost(IID, Ty, FMF, CostKind); assert(Cost >= 0 && "TTI should not produce negative costs!"); return Cost; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -181,8 +181,8 @@ TTI::TargetCostKind CostKind, unsigned Index); - InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind); InstructionCost getArithmeticReductionCostSVE(unsigned Opcode, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -3274,26 +3274,18 @@ } InstructionCost -AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, +AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind) { std::pair LT = getTypeLegalizationCost(Ty); if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16()) - return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind); - - assert((isa(Ty) == isa(CondTy)) && - "Both vector needs to be equally scalable"); + return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind); InstructionCost LegalizationCost = 0; if (LT.first > 1) { Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext()); - Intrinsic::ID MinMaxOpcode = - Ty->isFPOrFPVectorTy() - ? Intrinsic::maxnum - : (IsUnsigned ? Intrinsic::umin : Intrinsic::smin); - IntrinsicCostAttributes Attrs(MinMaxOpcode, LegalVTy, {LegalVTy, LegalVTy}, - FMF); + IntrinsicCostAttributes Attrs(IID, LegalVTy, {LegalVTy, LegalVTy}, FMF); LegalizationCost = getIntrinsicInstrCost(Attrs, CostKind) * (LT.first - 1); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -251,8 +251,8 @@ InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); - InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind); }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -787,15 +787,15 @@ } InstructionCost -GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, +GCNTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind) { EVT OrigTy = TLI->getValueType(DL, Ty); // Computes cost on targets that have packed math instructions(which support // 16-bit types only). if (!ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16) - return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind); + return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind); std::pair LT = getTypeLegalizationCost(Ty); return LT.first * getHalfRateInstrCost(CostKind); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -148,8 +148,8 @@ TTI::TargetCostKind CostKind, const Instruction *I = nullptr); - InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind); InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1212,15 +1212,15 @@ } InstructionCost -RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, +RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind) { if (isa(Ty) && !ST->useRVVForFixedLengthVectors()) - return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind); + return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind); // Skip if scalar size of Ty is bigger than ELEN. if (Ty->getScalarSizeInBits() > ST->getELEN()) - return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind); + return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind); std::pair LT = getTypeLegalizationCost(Ty); if (Ty->getElementType()->isIntegerTy(1)) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -207,12 +207,12 @@ std::optional FMF, TTI::TargetCostKind CostKind); - InstructionCost getMinMaxCost(Type *Ty, Type *CondTy, - TTI::TargetCostKind CostKind, bool IsUnsigned, + InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty, + TTI::TargetCostKind CostKind, FastMathFlags FMF); - InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, + InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, + FastMathFlags FMF, TTI::TargetCostKind CostKind); InstructionCost getInterleavedMemoryOpCost( diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -5256,25 +5256,16 @@ CostKind, 0, nullptr, nullptr); } -InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, +InstructionCost X86TTIImpl::getMinMaxCost(Intrinsic::ID IID, Type *Ty, TTI::TargetCostKind CostKind, - bool IsUnsigned, FastMathFlags FMF) { - Intrinsic::ID Id; - if (Ty->isIntOrIntVectorTy()) { - Id = IsUnsigned ? Intrinsic::umin : Intrinsic::smin; - } else { - assert(Ty->isFPOrFPVectorTy() && - "Expected float point or integer vector type."); - Id = Intrinsic::minnum; - } - - IntrinsicCostAttributes ICA(Id, Ty, {Ty, Ty}, FMF); + FastMathFlags FMF) { + IntrinsicCostAttributes ICA(IID, Ty, {Ty, Ty}, FMF); return getIntrinsicInstrCost(ICA, CostKind); } InstructionCost -X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, - bool IsUnsigned, FastMathFlags FMF, +X86TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *ValTy, + FastMathFlags FMF, TTI::TargetCostKind CostKind) { std::pair LT = getTypeLegalizationCost(ValTy); @@ -5282,11 +5273,14 @@ int ISD; if (ValTy->isIntOrIntVectorTy()) { - ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN; + ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ? ISD::UMIN + : ISD::SMIN; } else { assert(ValTy->isFPOrFPVectorTy() && "Expected float point or integer vector type."); - ISD = ISD::FMINNUM; + ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum) + ? ISD::FMINNUM + : ISD::FMINIMUM; } // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput @@ -5362,9 +5356,7 @@ // Type needs to be split. We need LT.first - 1 operations ops. Ty = FixedVectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements()); - auto *SubCondTy = FixedVectorType::get(CondTy->getElementType(), - MTy.getVectorNumElements()); - MinMaxCost = getMinMaxCost(Ty, SubCondTy, CostKind, IsUnsigned, FMF); + MinMaxCost = getMinMaxCost(IID, Ty, CostKind, FMF); MinMaxCost *= LT.first - 1; NumVecElts = MTy.getVectorNumElements(); } @@ -5391,8 +5383,7 @@ // by type legalization. if (!isPowerOf2_32(ValVTy->getNumElements()) || ScalarSize != MTy.getScalarSizeInBits()) - return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsUnsigned, FMF, - CostKind); + return BaseT::getMinMaxReductionCost(IID, ValTy, FMF, CostKind); // Now handle reduction with the legal type, taking into account size changes // at each level. @@ -5436,9 +5427,7 @@ } // Add the arithmetic op for this level. - auto *SubCondTy = - FixedVectorType::get(CondTy->getElementType(), Ty->getNumElements()); - MinMaxCost += getMinMaxCost(Ty, SubCondTy, CostKind, IsUnsigned, FMF); + MinMaxCost += getMinMaxCost(IID, Ty, CostKind, FMF); } // Add the final extract element to the cost. diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -13804,15 +13804,9 @@ case RecurKind::SMin: case RecurKind::UMax: case RecurKind::UMin: { - if (!AllConsts) { - auto *VecCondTy = - cast(CmpInst::makeCmpResultType(VectorTy)); - bool IsUnsigned = - RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin; - VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy, - IsUnsigned, FMF, CostKind); - } Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind); + if (!AllConsts) + VectorCost = TTI->getMinMaxReductionCost(Id, VectorTy, FMF, CostKind); ScalarCost = EvaluateScalarCost([&]() { IntrinsicCostAttributes ICA(Id, ScalarTy, {ScalarTy, ScalarTy}, FMF); return TTI->getIntrinsicInstrCost(ICA, CostKind); diff --git a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll --- a/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll +++ b/llvm/test/Analysis/CostModel/AArch64/reduce-minmax.ll @@ -162,10 +162,10 @@ define void @reduce_fmin16() { ; CHECK-NOF16-LABEL: 'reduce_fmin16' -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2f16 = call half @llvm.vector.reduce.fmin.v2f16(<2 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4f16 = call half @llvm.vector.reduce.fmin.v4f16(<4 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V8f16 = call half @llvm.vector.reduce.fmin.v8f16(<8 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V16f16 = call half @llvm.vector.reduce.fmin.v16f16(<16 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fminimum.v2f16(<2 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fminimum.v4f16(<4 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fminimum.v8f16(<8 x half> undef) @@ -196,10 +196,10 @@ define void @reduce_fmax16() { ; CHECK-NOF16-LABEL: 'reduce_fmax16' -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) -; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V2f16 = call half @llvm.vector.reduce.fmax.v2f16(<2 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V4f16 = call half @llvm.vector.reduce.fmax.v4f16(<4 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %V8f16 = call half @llvm.vector.reduce.fmax.v8f16(<8 x half> undef) +; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V16f16 = call half @llvm.vector.reduce.fmax.v16f16(<16 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2f16m = call half @llvm.vector.reduce.fmaximum.v2f16(<2 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4f16m = call half @llvm.vector.reduce.fmaximum.v4f16(<4 x half> undef) ; CHECK-NOF16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8f16m = call half @llvm.vector.reduce.fmaximum.v8f16(<8 x half> undef) diff --git a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll --- a/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll +++ b/llvm/test/Analysis/CostModel/ARM/intrinsic-cost-kinds.ll @@ -294,19 +294,19 @@ define void @reduce_fmax(<16 x float> %va) { ; THRU-LABEL: 'reduce_fmax' -; THRU-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; THRU-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; LATE-LABEL: 'reduce_fmax' -; LATE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'reduce_fmax' -; SIZE-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; SIZE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE_LATE-LABEL: 'reduce_fmax' -; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) +; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) ; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va)