diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -247,6 +247,10 @@ Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I = nullptr); + int getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, + bool IsPairwiseForm, + TTI::TargetCostKind CostKind); + int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind); diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/PatternMatch.h" @@ -1409,6 +1410,29 @@ return ScalarCost; } +int ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, + bool IsPairwiseForm, + TTI::TargetCostKind CostKind) { + EVT ValVT = TLI->getValueType(DL, ValTy); + int ISD = TLI->InstructionOpcodeToISD(Opcode); + if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD) + return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, + CostKind); + + std::pair LT = TLI->getTypeLegalizationCost(DL, ValTy); + + static const CostTblEntry CostTblAdd[]{ + {ISD::ADD, MVT::v16i8, 1}, + {ISD::ADD, MVT::v8i16, 1}, + {ISD::ADD, MVT::v4i32, 1}, + }; + if (const auto *Entry = CostTableLookup(CostTblAdd, ISD, LT.second)) + return Entry->Cost * ST->getMVEVectorCostFactor() * LT.first; + + return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, + CostKind); +} + int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { // Currently we make a somewhat optimistic assumption that active_lane_mask's diff --git a/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll b/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll --- a/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll +++ b/llvm/test/Analysis/CostModel/ARM/mve-vecreduce-add.ll @@ -5,9 +5,9 @@ ; CHECK-LABEL: 'add_i8' ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4120 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0 = call i8 @llvm.vector.reduce.add.v1i8(<1 x i8> undef) @@ -34,22 +34,22 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a1sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1168 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1168 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1168 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0za = zext <1 x i8> undef to <1 x i16> @@ -106,17 +106,17 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a1sa = sext <2 x i8> undef to <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a1sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5z = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a5za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5sa = sext <1 x i16> undef to <1 x i32> @@ -126,22 +126,22 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a6sa = sext <2 x i16> undef to <2 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6s = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a6sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9za) ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sa) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0za = zext <1 x i8> undef to <1 x i32> @@ -396,11 +396,11 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a1m = mul <2 x i8> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a1 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> %a1m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2m = mul <4 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> %a2m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3m = mul <8 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a3m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4m = mul <16 x i8> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 4120 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a4 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a4m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0m = mul <1 x i8> undef, undef @@ -442,37 +442,37 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2za = zext <4 x i8> undef to <4 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zb = zext <4 x i8> undef to <4 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i16> %a2za, %a2zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sa = sext <4 x i8> undef to <4 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sb = sext <4 x i8> undef to <4 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i16> %a2sa, %a2sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a2sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3za = zext <8 x i8> undef to <8 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3zb = zext <8 x i8> undef to <8 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3zm = mul <8 x i16> %a3za, %a3zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sa = sext <8 x i8> undef to <8 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sb = sext <8 x i8> undef to <8 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3sm = mul <8 x i16> %a3sa, %a3sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a3s = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a3sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4za = zext <16 x i8> undef to <16 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4zb = zext <16 x i8> undef to <16 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4zm = mul <16 x i16> %a4za, %a4zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1168 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sa = sext <16 x i8> undef to <16 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a4sb = sext <16 x i8> undef to <16 x i16> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4sm = mul <16 x i16> %a4sa, %a4sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 1168 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a4s = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a4sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5m = mul <1 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a5 = call i16 @llvm.vector.reduce.add.v1i16(<1 x i16> %a5m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a6m = mul <2 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a6 = call i16 @llvm.vector.reduce.add.v2i16(<2 x i16> %a6m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7m = mul <4 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7 = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a7m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8m = mul <8 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 782 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a8 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a8m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9m = mul <16 x i16> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 1168 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a9 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %a9m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0za = zext <1 x i8> undef to <1 x i16> @@ -564,27 +564,27 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2za = zext <4 x i8> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2zb = zext <4 x i8> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2zm = mul <4 x i32> %a2za, %a2zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sa = sext <4 x i8> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2sb = sext <4 x i8> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2sm = mul <4 x i32> %a2sa, %a2sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a2sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3za = zext <8 x i8> undef to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3zb = zext <8 x i8> undef to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3zm = mul <8 x i32> %a3za, %a3zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sa = sext <8 x i8> undef to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %a3sb = sext <8 x i8> undef to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3sm = mul <8 x i32> %a3sa, %a3sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a3s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a3sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4za = zext <16 x i8> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4zb = zext <16 x i8> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4zm = mul <16 x i32> %a4za, %a4zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sa = sext <16 x i8> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %a4sb = sext <16 x i8> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4sm = mul <16 x i32> %a4sa, %a4sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a4s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a4sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5za = zext <1 x i16> undef to <1 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5zb = zext <1 x i16> undef to <1 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a5zm = mul <1 x i32> %a5za, %a5zb @@ -604,37 +604,37 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7za = zext <4 x i16> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7zb = zext <4 x i16> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7zm = mul <4 x i32> %a7za, %a7zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sa = sext <4 x i16> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sb = sext <4 x i16> undef to <4 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7sm = mul <4 x i32> %a7sa, %a7sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a7s = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a7sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8za = zext <8 x i16> undef to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8zb = zext <8 x i16> undef to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8zm = mul <8 x i32> %a8za, %a8zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sa = sext <8 x i16> undef to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %a8sb = sext <8 x i16> undef to <8 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8sm = mul <8 x i32> %a8sa, %a8sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a8s = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a8sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9za = zext <16 x i16> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9zb = zext <16 x i16> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9zm = mul <16 x i32> %a9za, %a9zb -; CHECK-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9zm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sa = sext <16 x i16> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %a9sb = sext <16 x i16> undef to <16 x i32> ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9sm = mul <16 x i32> %a9sa, %a9sb -; CHECK-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a9s = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a9sm) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a10m = mul <1 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a10 = call i32 @llvm.vector.reduce.add.v1i32(<1 x i32> %a10m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %a11m = mul <2 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %a11 = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a11m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12m = mul <4 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a12 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a12m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13m = mul <8 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a13 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %a13m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14m = mul <16 x i32> undef, undef -; CHECK-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %a14 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %a14m) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %a0za = zext <1 x i8> undef to <1 x i32>