diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3779,15 +3779,19 @@ const APInt &DemandedElts, bool Insert, bool Extract) { + assert(DemandedElts.getBitWidth() == + cast(Ty)->getNumElements() && + "Vector size mismatch"); + + std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); + MVT MScalarTy = LT.second.getScalarType(); + unsigned SizeInBits = LT.second.getSizeInBits(); + InstructionCost Cost = 0; // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT. if (Insert) { - std::pair LT = TLI->getTypeLegalizationCost(DL, Ty); - MVT MScalarTy = LT.second.getScalarType(); - unsigned SizeInBits = LT.second.getSizeInBits(); - if ((MScalarTy == MVT::i16 && ST->hasSSE2()) || (MScalarTy.isInteger() && ST->hasSSE41()) || (MScalarTy == MVT::f32 && ST->hasSSE41())) { @@ -3865,8 +3869,47 @@ return MOVMSKCost; } - // TODO: Use default extraction for now, but we should investigate extending - // this to handle repeated subvector extraction. + if (LT.second.isVector()) { + int CostValue = *LT.first.getValue(); + assert(CostValue >= 0 && "Negative cost!"); + + unsigned NumElts = LT.second.getVectorNumElements() * CostValue; + assert(NumElts >= DemandedElts.getBitWidth() && + "Vector has been legalized to smaller element count"); + + // If we're extracting elements from a 128-bit subvector lane, we only need + // to extract each lane once, not for every element. + if (SizeInBits > 128) { + assert((SizeInBits % 128) == 0 && "Illegal vector"); + unsigned NumLegal128Lanes = SizeInBits / 128; + unsigned Num128Lanes = NumLegal128Lanes * CostValue; + APInt WidenedDemandedElts = DemandedElts.zextOrSelf(NumElts); + unsigned Scale = NumElts / Num128Lanes; + + // Add cost for each demanded 128-bit subvector extraction. + // Luckily this is a lot easier than for insertion. + APInt DemandedUpper128Lanes = + APIntOps::ScaleBitMask(WidenedDemandedElts, Num128Lanes); + + // Bottom 128-bit subvector extractions are free. + for (unsigned I = 0; I != Num128Lanes; I += NumLegal128Lanes) + DemandedUpper128Lanes.clearBit(I); + + Cost += DemandedUpper128Lanes.countPopulation(); + + // Add all the demanded element extractions together, but adjust the + // index to use the equivalent of the bottom 128 bit lane. + for (unsigned I = 0; I != NumElts; ++I) + if (WidenedDemandedElts[I]) { + unsigned Idx = I % Scale; + Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, Idx); + } + + return Cost; + } + } + + // Fallback to default extraction. Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, false, Extract); } diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -663,23 +663,23 @@ ; AVX-LABEL: 'frem' ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V16F32 = frem <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16F32 = frem <16 x float> undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F64 = frem <8 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = frem <4 x double> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F64 = frem <8 x double> undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512-LABEL: 'frem' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = frem <4 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8F32 = frem <8 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V16F32 = frem <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8F32 = frem <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16F32 = frem <16 x float> undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F64 = frem <2 x double> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4F64 = frem <4 x double> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8F64 = frem <8 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F64 = frem <4 x double> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8F64 = frem <8 x double> undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; SLM-LABEL: 'frem' diff --git a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll --- a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll @@ -266,8 +266,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) @@ -276,8 +276,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) @@ -286,8 +286,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) @@ -296,8 +296,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) @@ -306,8 +306,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) @@ -316,8 +316,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'casts' @@ -369,8 +369,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) @@ -379,8 +379,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) @@ -389,8 +389,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) @@ -399,8 +399,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) @@ -409,8 +409,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) @@ -419,8 +419,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'casts' @@ -472,8 +472,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) @@ -482,8 +482,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) @@ -492,8 +492,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) @@ -502,8 +502,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) @@ -512,8 +512,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) @@ -522,8 +522,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'casts' diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -28,15 +28,15 @@ ; AVX-LABEL: 'fptosi_double_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptosi_double_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptosi_double_i64' @@ -216,17 +216,17 @@ ; AVX-LABEL: 'fptosi_float_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptosi_float_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptosi_float_i64' diff --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll --- a/llvm/test/Analysis/CostModel/X86/fptoui.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll @@ -28,22 +28,22 @@ ; AVX1-LABEL: 'fptoui_double_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'fptoui_double_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = fptoui double undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptoui_double_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptoui_double_i64' @@ -223,25 +223,25 @@ ; AVX1-LABEL: 'fptoui_float_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'fptoui_float_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = fptoui float undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptoui_float_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptoui_float_i64' diff --git a/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll @@ -42,19 +42,19 @@ ; AVX2-FASTGATHER-LABEL: 'test' ; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-FASTGATHER: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-FASTGATHER: LV: Found an estimated cost of 28 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-FASTGATHER: LV: Found an estimated cost of 58 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-FASTGATHER: LV: Found an estimated cost of 116 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-FASTGATHER: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-FASTGATHER: LV: Found an estimated cost of 26 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-FASTGATHER: LV: Found an estimated cost of 54 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-FASTGATHER: LV: Found an estimated cost of 108 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 30 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 62 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 124 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 248 for VF 64 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 56 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 112 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 224 for VF 64 For instruction: %valB = load i16, i16* %inB, align 2 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll @@ -57,7 +57,7 @@ ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll @@ -57,7 +57,7 @@ ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX512: LV: Found an estimated cost of 16 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX512: LV: Found an estimated cost of 15 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll @@ -49,19 +49,19 @@ ; AVX2-FASTGATHER-LABEL: 'test' ; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1 ; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX2-FASTGATHER: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX2-FASTGATHER: LV: Found an estimated cost of 28 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX2-FASTGATHER: LV: Found an estimated cost of 56 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX2-FASTGATHER: LV: Found an estimated cost of 114 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX2-FASTGATHER: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX2-FASTGATHER: LV: Found an estimated cost of 26 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX2-FASTGATHER: LV: Found an estimated cost of 52 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX2-FASTGATHER: LV: Found an estimated cost of 106 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 30 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 60 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 122 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 244 for VF 64 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 54 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 110 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 220 for VF 64 For instruction: %valB = load i8, i8* %inB, align 1 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 38 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 76 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll @@ -21,11 +21,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 57 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 228 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll @@ -21,11 +21,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 34 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 76 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 304 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 256 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 41 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 95 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 190 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 51 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 228 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 58 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 133 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 266 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 304 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 256 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll @@ -22,10 +22,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 16 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 14 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 28 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 56 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 112 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 84 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 32 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 56 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 112 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll @@ -19,9 +19,9 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 70 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll @@ -19,9 +19,9 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 84 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll @@ -19,9 +19,9 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll @@ -19,9 +19,9 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll @@ -24,9 +24,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 41 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 86 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 172 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 72 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 144 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 10 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 20 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 372 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 288 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll @@ -23,10 +23,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 31 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 58 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 129 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 258 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 51 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 216 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 59 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 558 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 432 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll @@ -23,10 +23,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 41 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 82 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 172 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 344 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 68 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 288 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 77 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 154 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 744 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 576 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll @@ -22,11 +22,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 50 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 99 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 215 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 430 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 43 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 360 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -38,12 +38,12 @@ ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 55 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 106 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 229 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 465 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 930 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 360 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 720 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll @@ -22,11 +22,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 31 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 58 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 123 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 258 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 516 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 51 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 102 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 41 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 109 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 218 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1116 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 864 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll @@ -22,11 +22,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 39 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 301 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 602 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 119 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 504 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -38,12 +38,12 @@ ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 39 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 81 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 156 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 322 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 651 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1302 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 121 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 504 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 1008 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll @@ -22,11 +22,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 41 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 82 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 164 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 344 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 688 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 136 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 576 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -38,12 +38,12 @@ ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 41 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 89 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 178 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 372 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 744 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1488 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 136 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 576 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 1152 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 24 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 48 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 88 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 46 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 92 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 184 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll @@ -22,10 +22,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 47 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 94 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 188 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 19 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 84 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 168 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 25 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 50 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 100 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 24 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 48 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 96 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll @@ -21,11 +21,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 69 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 138 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 276 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll @@ -21,11 +21,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 32 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 70 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 140 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 62 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 124 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 248 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll @@ -21,11 +21,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 20 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll @@ -21,11 +21,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 92 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 184 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 368 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 320 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 51 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 115 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 230 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 138 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 276 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 38 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 161 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 322 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 42 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 84 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 184 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 368 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 320 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll @@ -22,10 +22,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 26 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 208 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 39 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 78 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 156 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 72 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 22 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 208 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll @@ -19,9 +19,9 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 65 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 130 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll @@ -19,9 +19,9 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 78 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 156 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 144 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll @@ -19,9 +19,9 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 38 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 91 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 182 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 84 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 168 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll @@ -19,9 +19,9 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 44 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 208 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 40 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 192 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll @@ -25,8 +25,8 @@ ; AVX1: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 33 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 81 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 166 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 66 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 136 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 7 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 362 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 272 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll @@ -24,9 +24,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 59 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 249 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 99 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 204 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 14 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 543 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 408 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll @@ -24,9 +24,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 81 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 162 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 332 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 272 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 25 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 58 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 724 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 544 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll @@ -23,10 +23,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 195 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 415 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 83 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -39,11 +39,11 @@ ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 48 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 107 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 445 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 905 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 680 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll @@ -23,10 +23,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 59 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 243 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 498 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 99 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 198 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 408 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 45 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 85 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1086 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 816 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll @@ -23,10 +23,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 73 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 276 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 581 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 118 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 231 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -39,11 +39,11 @@ ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 73 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 157 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 308 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 626 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1267 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 233 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 952 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll @@ -23,10 +23,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 81 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 162 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 324 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 664 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -39,11 +39,11 @@ ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 33 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 81 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 177 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 354 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 724 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1448 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 1088 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4 ; AVX1: LV: Found an estimated cost of 15 for VF 4 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 38 for VF 8 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 76 for VF 16 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 32 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: store float %v1, ptr %out1, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 ; AVX1: LV: Found an estimated cost of 13 for VF 2 For instruction: store float %v2, ptr %out2, align 4 ; AVX1: LV: Found an estimated cost of 23 for VF 4 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 57 for VF 8 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 228 for VF 32 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: store float %v2, ptr %out2, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 ; AVX1: LV: Found an estimated cost of 13 for VF 2 For instruction: store float %v3, ptr %out3, align 4 ; AVX1: LV: Found an estimated cost of 30 for VF 4 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 76 for VF 8 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 16 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 304 for VF 32 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 256 for VF 32 For instruction: store float %v3, ptr %out3, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll @@ -22,15 +22,15 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store float %v4, ptr %out4, align 4 ; AVX1: LV: Found an estimated cost of 38 for VF 4 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 95 for VF 8 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 190 for VF 16 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: store float %v4, ptr %out4, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 ; AVX2: LV: Found an estimated cost of 17 for VF 2 For instruction: store float %v4, ptr %out4, align 4 ; AVX2: LV: Found an estimated cost of 38 for VF 4 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 95 for VF 8 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 190 for VF 16 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 160 for VF 16 For instruction: store float %v4, ptr %out4, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll @@ -22,8 +22,8 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 ; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store float %v5, ptr %out5, align 4 ; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 228 for VF 16 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: store float %v5, ptr %out5, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll @@ -22,15 +22,15 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 ; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v6, ptr %out6, align 4 ; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 133 for VF 8 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 266 for VF 16 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: store float %v6, ptr %out6, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 ; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v6, ptr %out6, align 4 ; AVX2: LV: Found an estimated cost of 53 for VF 4 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 133 for VF 8 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 266 for VF 16 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 224 for VF 16 For instruction: store float %v6, ptr %out6, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll @@ -22,15 +22,15 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 ; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v7, ptr %out7, align 4 ; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 152 for VF 8 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 304 for VF 16 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 256 for VF 16 For instruction: store float %v7, ptr %out7, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 ; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v7, ptr %out7, align 4 ; AVX2: LV: Found an estimated cost of 60 for VF 4 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 152 for VF 8 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 304 for VF 16 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 256 for VF 16 For instruction: store float %v7, ptr %out7, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll @@ -22,10 +22,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 ; AVX1: LV: Found an estimated cost of 6 for VF 2 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 16 for VF 4 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: store double %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: store double %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 14 for VF 4 For instruction: store double %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 28 for VF 8 For instruction: store double %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 56 for VF 16 For instruction: store double %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 112 for VF 32 For instruction: store double %v1, ptr %out1, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, ptr %out1, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll @@ -21,9 +21,9 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 ; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 54 for VF 8 For instruction: store double %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: store double %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: store double %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store double %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store double %v2, ptr %out2, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, ptr %out2, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll @@ -21,9 +21,9 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 ; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 32 for VF 4 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: store double %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: store double %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store double %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 56 for VF 8 For instruction: store double %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 112 for VF 16 For instruction: store double %v3, ptr %out3, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, ptr %out3, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-5.ll @@ -20,14 +20,14 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 ; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: store double %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: store double %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store double %v4, ptr %out4, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 ; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 49 for VF 4 For instruction: store double %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 98 for VF 8 For instruction: store double %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 44 for VF 4 For instruction: store double %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 88 for VF 8 For instruction: store double %v4, ptr %out4, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v4, ptr %out4, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll @@ -20,8 +20,8 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 ; AVX1: LV: Found an estimated cost of 21 for VF 2 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store double %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 108 for VF 8 For instruction: store double %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store double %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store double %v5, ptr %out5, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, ptr %out5, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-7.ll @@ -20,14 +20,14 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 ; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 59 for VF 4 For instruction: store double %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 118 for VF 8 For instruction: store double %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: store double %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 104 for VF 8 For instruction: store double %v6, ptr %out6, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 ; AVX2: LV: Found an estimated cost of 23 for VF 2 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 59 for VF 4 For instruction: store double %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 118 for VF 8 For instruction: store double %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 52 for VF 4 For instruction: store double %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 104 for VF 8 For instruction: store double %v6, ptr %out6, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v6, ptr %out6, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll @@ -20,14 +20,14 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 ; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: store double %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: store double %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: store double %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store double %v7, ptr %out7, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 ; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store double %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: store double %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: store double %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: store double %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: store double %v7, ptr %out7, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v7, ptr %out7, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll @@ -25,8 +25,8 @@ ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2 ; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2 ; AVX1: LV: Found an estimated cost of 35 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 86 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 172 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX1: LV: Found an estimated cost of 72 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX1: LV: Found an estimated cost of 144 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 4 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 ; AVX512DQ: LV: Found an estimated cost of 5 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 ; AVX512DQ: LV: Found an estimated cost of 10 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512DQ: LV: Found an estimated cost of 372 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX512DQ: LV: Found an estimated cost of 288 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll @@ -25,8 +25,8 @@ ; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2 ; AVX1: LV: Found an estimated cost of 30 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2 ; AVX1: LV: Found an estimated cost of 53 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 129 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 258 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: LV: Found an estimated cost of 216 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 15 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 ; AVX512DQ: LV: Found an estimated cost of 29 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 ; AVX512DQ: LV: Found an estimated cost of 57 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512DQ: LV: Found an estimated cost of 558 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX512DQ: LV: Found an estimated cost of 432 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll @@ -25,8 +25,8 @@ ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2 ; AVX1: LV: Found an estimated cost of 35 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2 ; AVX1: LV: Found an estimated cost of 70 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 172 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 344 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: LV: Found an estimated cost of 288 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 11 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 ; AVX512DQ: LV: Found an estimated cost of 34 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 ; AVX512DQ: LV: Found an estimated cost of 68 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512DQ: LV: Found an estimated cost of 744 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX512DQ: LV: Found an estimated cost of 576 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll @@ -25,25 +25,25 @@ ; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 215 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 430 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX2: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX2: LV: Found an estimated cost of 45 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX2: LV: Found an estimated cost of 88 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 215 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 430 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 180 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX512DQ: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX512DQ: LV: Found an estimated cost of 47 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX512DQ: LV: Found an estimated cost of 87 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 213 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 465 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 930 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 178 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 720 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll @@ -25,8 +25,8 @@ ; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2 ; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2 ; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 258 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 516 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 23 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 ; AVX512DQ: LV: Found an estimated cost of 61 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 ; AVX512DQ: LV: Found an estimated cost of 96 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512DQ: LV: Found an estimated cost of 1116 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX512DQ: LV: Found an estimated cost of 864 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll @@ -25,25 +25,25 @@ ; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX1: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX1: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 301 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 602 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 252 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX2: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX2: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 301 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 602 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 252 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX512DQ: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 ; AVX512DQ: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 298 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 651 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 1302 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 249 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 1008 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll @@ -25,25 +25,25 @@ ; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX1: LV: Found an estimated cost of 70 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 344 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 688 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 288 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX2: LV: Found an estimated cost of 70 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 344 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 688 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 288 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX512DQ: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX512DQ: LV: Found an estimated cost of 69 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 ; AVX512DQ: LV: Found an estimated cost of 138 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 340 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 744 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 1488 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 284 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 1152 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4 ; AVX1: LV: Found an estimated cost of 19 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 46 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 92 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 184 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 ; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4 ; AVX1: LV: Found an estimated cost of 29 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 69 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 138 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 276 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 ; AVX1: LV: Found an estimated cost of 19 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4 ; AVX1: LV: Found an estimated cost of 38 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 92 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 184 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 368 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 320 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll @@ -22,15 +22,15 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 ; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 ; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 115 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 230 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 ; AVX2: LV: Found an estimated cost of 25 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 ; AVX2: LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 115 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 230 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 100 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 200 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll @@ -22,8 +22,8 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 ; AVX1: LV: Found an estimated cost of 29 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4 ; AVX1: LV: Found an estimated cost of 57 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 138 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 276 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll @@ -22,15 +22,15 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 ; AVX1: LV: Found an estimated cost of 37 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 ; AVX1: LV: Found an estimated cost of 67 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 161 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 322 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 ; AVX2: LV: Found an estimated cost of 37 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 ; AVX2: LV: Found an estimated cost of 67 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 161 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 322 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 280 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll @@ -22,15 +22,15 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 ; AVX1: LV: Found an estimated cost of 38 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 ; AVX1: LV: Found an estimated cost of 76 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 184 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 368 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 320 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 ; AVX2: LV: Found an estimated cost of 38 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 ; AVX2: LV: Found an estimated cost of 76 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 184 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 368 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 160 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 320 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll @@ -22,10 +22,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 ; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 26 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 208 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll @@ -21,9 +21,9 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 39 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 78 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 156 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 72 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll @@ -21,9 +21,9 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 ; AVX1: LV: Found an estimated cost of 22 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 208 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll @@ -20,14 +20,14 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 ; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 65 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 130 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 ; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 65 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 130 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 60 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll @@ -20,8 +20,8 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 ; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 78 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 156 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 144 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll @@ -20,14 +20,14 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 ; AVX1: LV: Found an estimated cost of 39 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 91 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 182 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 84 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 168 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 ; AVX2: LV: Found an estimated cost of 39 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 91 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 182 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 84 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 168 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll @@ -20,14 +20,14 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 ; AVX1: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 104 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 208 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 96 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 192 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 ; AVX2: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 104 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 208 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 96 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 192 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1 ; AVX1: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1 ; AVX1: LV: Found an estimated cost of 67 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX1: LV: Found an estimated cost of 166 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX1: LV: Found an estimated cost of 136 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1 ; AVX512DQ: LV: Found an estimated cost of 4 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 ; AVX512DQ: LV: Found an estimated cost of 5 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512DQ: LV: Found an estimated cost of 362 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX512DQ: LV: Found an estimated cost of 272 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1 ; AVX1: LV: Found an estimated cost of 54 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 ; AVX1: LV: Found an estimated cost of 101 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 249 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX1: LV: Found an estimated cost of 204 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 9 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 ; AVX512DQ: LV: Found an estimated cost of 14 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 ; AVX512DQ: LV: Found an estimated cost of 15 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512DQ: LV: Found an estimated cost of 543 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX512DQ: LV: Found an estimated cost of 408 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1 ; AVX1: LV: Found an estimated cost of 67 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 ; AVX1: LV: Found an estimated cost of 134 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 332 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX1: LV: Found an estimated cost of 272 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 5 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 ; AVX512DQ: LV: Found an estimated cost of 9 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 ; AVX512DQ: LV: Found an estimated cost of 14 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512DQ: LV: Found an estimated cost of 724 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX512DQ: LV: Found an estimated cost of 544 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX1: LV: Found an estimated cost of 85 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX1: LV: Found an estimated cost of 168 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 415 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX1: LV: Found an estimated cost of 340 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 @@ -34,7 +34,7 @@ ; AVX2: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX2: LV: Found an estimated cost of 85 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX2: LV: Found an estimated cost of 168 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 415 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX2: LV: Found an estimated cost of 340 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 @@ -42,8 +42,8 @@ ; AVX512DQ: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX512DQ: LV: Found an estimated cost of 87 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX512DQ: LV: Found an estimated cost of 167 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 413 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 905 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: LV: Found an estimated cost of 338 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: LV: Found an estimated cost of 680 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1 ; AVX1: LV: Found an estimated cost of 101 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 ; AVX1: LV: Found an estimated cost of 201 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 498 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX1: LV: Found an estimated cost of 408 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 19 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 ; AVX512DQ: LV: Found an estimated cost of 29 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 ; AVX512DQ: LV: Found an estimated cost of 93 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512DQ: LV: Found an estimated cost of 1086 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX512DQ: LV: Found an estimated cost of 816 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX1: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX1: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 581 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX1: LV: Found an estimated cost of 476 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 @@ -34,7 +34,7 @@ ; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX2: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX2: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 581 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX2: LV: Found an estimated cost of 476 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 @@ -42,8 +42,8 @@ ; AVX512DQ: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX512DQ: LV: Found an estimated cost of 122 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX512DQ: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 578 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 1267 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 473 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 952 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX1: LV: Found an estimated cost of 134 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX1: LV: Found an estimated cost of 268 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 664 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX1: LV: Found an estimated cost of 544 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 @@ -34,7 +34,7 @@ ; AVX2: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX2: LV: Found an estimated cost of 134 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX2: LV: Found an estimated cost of 268 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 664 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX2: LV: Found an estimated cost of 544 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 @@ -42,8 +42,8 @@ ; AVX512DQ: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX512DQ: LV: Found an estimated cost of 133 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX512DQ: LV: Found an estimated cost of 266 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 660 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 1448 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 540 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 1088 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll @@ -50,7 +50,7 @@ ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4 -; AVX512: LV: Found an estimated cost of 22 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4 +; AVX512: LV: Found an estimated cost of 21 for VF 4 For instruction: %valB.loaded = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB.loaded = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: %valB.loaded = load i32, i32* %inB, align 4 ; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: %valB.loaded = load i32, i32* %inB, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll @@ -50,7 +50,7 @@ ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8 -; AVX512: LV: Found an estimated cost of 24 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8 +; AVX512: LV: Found an estimated cost of 23 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll --- a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll @@ -24,12 +24,12 @@ ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 30 for VF 8 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 30 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 62 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 62 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 56 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 56 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED-LABEL: 'test1' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll --- a/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll @@ -24,12 +24,12 @@ ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 30 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 30 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 68 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 68 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED-LABEL: 'test1' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 @@ -81,11 +81,11 @@ ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 11 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 10 for VF 4 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 4 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 23 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 21 for VF 8 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 50 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 43 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED-LABEL: 'test2' @@ -148,14 +148,14 @@ ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; ; ENABLED_MASKED_STRIDED-LABEL: 'test' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 +; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -80,12 +80,12 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 292 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -107,12 +107,12 @@ ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 308 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -249,12 +249,12 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -276,12 +276,12 @@ ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> undef, <8 x i32>* undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> undef, <4 x i32>* undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 168 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 352 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -402,57 +402,57 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX1-LABEL: 'masked_gather' -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX2-LABEL: 'masked_gather' -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKL-LABEL: 'masked_gather' @@ -472,41 +472,41 @@ ; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_gather' ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 244 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_gather' @@ -526,14 +526,14 @@ ; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 244 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) @@ -625,57 +625,57 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX-LABEL: 'masked_scatter' -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 210 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_scatter' ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 288 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_scatter' @@ -695,14 +695,14 @@ ; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 288 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) @@ -936,109 +936,109 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX1-LABEL: 'masked_compressstore' -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX2-LABEL: 'masked_compressstore' -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKL-LABEL: 'masked_compressstore' -; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_compressstore' -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 120 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 240 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -1333,11 +1333,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32' @@ -1345,7 +1345,7 @@ ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32' -; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32' @@ -1366,11 +1366,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' @@ -1378,7 +1378,7 @@ ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' @@ -1405,13 +1405,13 @@ ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' @@ -1449,13 +1449,13 @@ ; AVX1-LABEL: 'test_gather_16f32_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_var_mask' @@ -1493,13 +1493,13 @@ ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' @@ -1543,7 +1543,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -1551,7 +1551,7 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -1602,7 +1602,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_scatter_16i32' @@ -1610,7 +1610,7 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKL-LABEL: 'test_scatter_16i32' @@ -1618,7 +1618,7 @@ ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> poison, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_16i32' @@ -1648,7 +1648,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_8i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_8i32' @@ -1669,11 +1669,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; KNL-LABEL: 'test_scatter_4i32' -; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKX-LABEL: 'test_scatter_4i32' @@ -1700,13 +1700,13 @@ ; AVX1-LABEL: 'test_gather_4f32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32' @@ -1718,7 +1718,7 @@ ; KNL-LABEL: 'test_gather_4f32' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32' @@ -1750,13 +1750,13 @@ ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' @@ -1768,7 +1768,7 @@ ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -176,12 +176,12 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 292 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -235,12 +235,12 @@ ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 308 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 146 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -537,12 +537,12 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -596,12 +596,12 @@ ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0v3i32(<3 x i32> undef, <3 x i32>* undef, i32 1, <3 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0v2i32(<2 x i32> undef, <2 x i32>* undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v1i32.p0v1i32(<1 x i32> undef, <1 x i32>* undef, i32 1, <1 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 168 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 72 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.store.v32i16.p0v32i16(<32 x i16> undef, <32 x i16>* undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v16i16.p0v16i16(<16 x i16> undef, <16 x i16>* undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i16.p0v8i16(<8 x i16> undef, <8 x i16>* undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v4i16.p0v4i16(<4 x i16> undef, <4 x i16>* undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 352 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: call void @llvm.masked.store.v64i8.p0v64i8(<64 x i8> undef, <64 x i8>* undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: call void @llvm.masked.store.v32i8.p0v32i8(<32 x i8> undef, <32 x i8>* undef, i32 1, <32 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.store.v16i8.p0v16i8(<16 x i8> undef, <16 x i8>* undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v8i8.p0v8i8(<8 x i8> undef, <8 x i8>* undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -786,57 +786,57 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX1-LABEL: 'masked_gather' -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX2-LABEL: 'masked_gather' -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKL-LABEL: 'masked_gather' @@ -856,41 +856,41 @@ ; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_gather' ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 244 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_gather' @@ -910,14 +910,14 @@ ; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 244 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> undef, i32 1, <8 x i1> undef, <8 x double> undef) @@ -1009,57 +1009,57 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX-LABEL: 'masked_scatter' -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 256 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 210 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 105 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; KNL-LABEL: 'masked_scatter' ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> undef, <4 x double*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> undef, <2 x double*> undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1f64.v1p0f64(<1 x double> undef, <1 x double*> undef, i32 1, <1 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> undef, <16 x float*> undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> undef, <8 x float*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4f32.v4p0f32(<4 x float> undef, <4 x float*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> undef, <2 x float*> undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i64.v8p0i64(<8 x i64> undef, <8 x i64*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0i64(<4 x i64> undef, <4 x i64*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> undef, <2 x i64*> undef, i32 1, <2 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v1i64.v1p0i64(<1 x i64> undef, <1 x i64*> undef, i32 1, <1 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> undef, <16 x i32*> undef, i32 1, <16 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 288 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKX-LABEL: 'masked_scatter' @@ -1079,14 +1079,14 @@ ; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> undef, <8 x i32*> undef, i32 1, <8 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> undef, <4 x i32*> undef, i32 1, <4 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> undef, <2 x i32*> undef, i32 1, <2 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 288 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: call void @llvm.masked.scatter.v32i16.v32p0i16(<32 x i16> undef, <32 x i16*> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.scatter.v16i16.v16p0i16(<16 x i16> undef, <16 x i16*> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i16.v8p0i16(<8 x i16> undef, <8 x i16*> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16> undef, <4 x i16*> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: call void @llvm.masked.scatter.v64i8.v64p0i8(<64 x i8> undef, <64 x i8*> undef, i32 1, <64 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: call void @llvm.masked.scatter.v32i8.v32p0i8(<32 x i8> undef, <32 x i8*> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.scatter.v16i8.v16p0i8(<16 x i8> undef, <16 x i8*> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.scatter.v8i8.v8p0i8(<8 x i8> undef, <8 x i8*> undef, i32 1, <8 x i1> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; call void @llvm.masked.scatter.v8f64.v8p0f64(<8 x double> undef, <8 x double*> undef, i32 1, <8 x i1> undef) @@ -1320,109 +1320,109 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX1-LABEL: 'masked_compressstore' -; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX2-LABEL: 'masked_compressstore' -; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; SKL-LABEL: 'masked_compressstore' -; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 ; ; AVX512-LABEL: 'masked_compressstore' -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, double* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, double* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, double* undef, <2 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, double* undef, <1 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, float* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, float* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, float* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, float* undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, i64* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, i64* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, i64* undef, <2 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, i64* undef, <1 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, i32* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, i32* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, i32* undef, <4 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, i32* undef, <2 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 120 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, i16* undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, i16* undef, <16 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, i16* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, i16* undef, <4 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 240 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 112 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, i8* undef, <64 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, i8* undef, <32 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, i8* undef, <16 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, i8* undef, <8 x i1> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -1717,11 +1717,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32' @@ -1729,7 +1729,7 @@ ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32' -; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32' @@ -1750,11 +1750,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX1-LABEL: 'test_gather_4i32_const_mask' -; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; AVX2-LABEL: 'test_gather_4i32_const_mask' -; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKL-LABEL: 'test_gather_4i32_const_mask' @@ -1762,7 +1762,7 @@ ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; KNL-LABEL: 'test_gather_4i32_const_mask' -; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %res ; ; SKX-LABEL: 'test_gather_4i32_const_mask' @@ -1789,13 +1789,13 @@ ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' @@ -1833,13 +1833,13 @@ ; AVX1-LABEL: 'test_gather_16f32_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_var_mask' @@ -1877,13 +1877,13 @@ ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' @@ -1927,7 +1927,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -1935,7 +1935,7 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' @@ -1986,7 +1986,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_scatter_16i32' @@ -1994,7 +1994,7 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKL-LABEL: 'test_scatter_16i32' @@ -2002,7 +2002,7 @@ ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind ; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> -; SKL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_16i32' @@ -2032,7 +2032,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_8i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_scatter_8i32' @@ -2053,11 +2053,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_scatter_4i32' -; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; KNL-LABEL: 'test_scatter_4i32' -; KNL-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) +; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SKX-LABEL: 'test_scatter_4i32' @@ -2084,13 +2084,13 @@ ; AVX1-LABEL: 'test_gather_4f32' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32' @@ -2102,7 +2102,7 @@ ; KNL-LABEL: 'test_gather_4f32' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32' @@ -2134,13 +2134,13 @@ ; AVX1-LABEL: 'test_gather_4f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; AVX2-LABEL: 'test_gather_4f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKL-LABEL: 'test_gather_4f32_const_mask' @@ -2152,7 +2152,7 @@ ; KNL-LABEL: 'test_gather_4f32_const_mask' ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind -; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> , <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %res ; ; SKX-LABEL: 'test_gather_4f32_const_mask' diff --git a/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll @@ -34,22 +34,22 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 ; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 20 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 40 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 17 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 34 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 ; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 40 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 17 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 34 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 -; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 +; AVX512: LV: Found an estimated cost of 10 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll @@ -33,23 +33,23 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 5 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 9 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 18 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 36 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 5 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 9 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 18 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 36 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 5 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 -; AVX512: LV: Found an estimated cost of 12 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 +; AVX512: LV: Found an estimated cost of 11 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll --- a/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll @@ -27,16 +27,16 @@ ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, i16* %out, align 2 ; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 ; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 -; AVX1: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 -; AVX1: LV: Found an estimated cost of 40 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 +; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 +; AVX1: LV: Found an estimated cost of 33 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2 ; AVX2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, i16* %out, align 2 ; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 ; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 -; AVX2: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 -; AVX2: LV: Found an estimated cost of 40 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 +; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 +; AVX2: LV: Found an estimated cost of 33 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll b/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll --- a/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll @@ -35,7 +35,7 @@ ; AVX1: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 ; AVX1: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 ; AVX1: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 -; AVX1: LV: Found an estimated cost of 40 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 +; AVX1: LV: Found an estimated cost of 32 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1 @@ -43,7 +43,7 @@ ; AVX2: LV: Found an estimated cost of 4 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 ; AVX2: LV: Found an estimated cost of 8 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 ; AVX2: LV: Found an estimated cost of 16 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 -; AVX2: LV: Found an estimated cost of 40 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 +; AVX2: LV: Found an estimated cost of 32 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll b/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll --- a/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll @@ -45,25 +45,25 @@ ; AVX1-LABEL: 'reduce_f64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double %arg, <1 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double %arg, <2 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double %arg, <4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double %arg, <8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double %arg, <16 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double %arg, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double %arg, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double %arg, <16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'reduce_f64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double %arg, <1 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double %arg, <2 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double %arg, <4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double %arg, <8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double %arg, <16 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double %arg, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double %arg, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double %arg, <16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'reduce_f64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call double @llvm.vector.reduce.fadd.v1f64(double %arg, <1 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fadd.v2f64(double %arg, <2 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double %arg, <4 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double %arg, <8 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double %arg, <16 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fadd.v4f64(double %arg, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call double @llvm.vector.reduce.fadd.v8f64(double %arg, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call double @llvm.vector.reduce.fadd.v16f64(double %arg, <16 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1 = call double @llvm.vector.reduce.fadd.v1f64(double %arg, <1 x double> undef) @@ -115,27 +115,27 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32 = call float @llvm.vector.reduce.fadd.v32f32(float %arg, <32 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call float @llvm.vector.reduce.fadd.v32f32(float %arg, <32 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'reduce_f32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32 = call float @llvm.vector.reduce.fadd.v32f32(float %arg, <32 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call float @llvm.vector.reduce.fadd.v32f32(float %arg, <32 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'reduce_f32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32 = call float @llvm.vector.reduce.fadd.v32f32(float %arg, <32 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call float @llvm.vector.reduce.fadd.v32f32(float %arg, <32 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll b/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll --- a/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll +++ b/llvm/test/Analysis/CostModel/X86/reduce-fmul.ll @@ -45,25 +45,25 @@ ; AVX1-LABEL: 'reduce_f64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double %arg, <1 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double %arg, <2 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double %arg, <4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double %arg, <8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double %arg, <16 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double %arg, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double %arg, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double %arg, <16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'reduce_f64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double %arg, <1 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double %arg, <2 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double %arg, <4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double %arg, <8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double %arg, <16 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double %arg, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double %arg, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double %arg, <16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'reduce_f64' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call double @llvm.vector.reduce.fmul.v1f64(double %arg, <1 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call double @llvm.vector.reduce.fmul.v2f64(double %arg, <2 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double %arg, <4 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double %arg, <8 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double %arg, <16 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call double @llvm.vector.reduce.fmul.v4f64(double %arg, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call double @llvm.vector.reduce.fmul.v8f64(double %arg, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call double @llvm.vector.reduce.fmul.v16f64(double %arg, <16 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1 = call double @llvm.vector.reduce.fmul.v1f64(double %arg, <1 x double> undef) @@ -115,27 +115,27 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float %arg, <1 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float %arg, <2 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float %arg, <4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float %arg, <8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float %arg, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32 = call float @llvm.vector.reduce.fmul.v32f32(float %arg, <32 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float %arg, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float %arg, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call float @llvm.vector.reduce.fmul.v32f32(float %arg, <32 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'reduce_f32' ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float %arg, <1 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float %arg, <2 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float %arg, <4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float %arg, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float %arg, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V32 = call float @llvm.vector.reduce.fmul.v32f32(float %arg, <32 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float %arg, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float %arg, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32 = call float @llvm.vector.reduce.fmul.v32f32(float %arg, <32 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'reduce_f32' ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1 = call float @llvm.vector.reduce.fmul.v1f32(float %arg, <1 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fmul.v2f32(float %arg, <2 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4 = call float @llvm.vector.reduce.fmul.v4f32(float %arg, <4 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float %arg, <8 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float %arg, <16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V32 = call float @llvm.vector.reduce.fmul.v32f32(float %arg, <32 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8 = call float @llvm.vector.reduce.fmul.v8f32(float %arg, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16 = call float @llvm.vector.reduce.fmul.v16f32(float %arg, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V32 = call float @llvm.vector.reduce.fmul.v32f32(float %arg, <32 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V1 = call float @llvm.vector.reduce.fmul.v1f32(float %arg, <1 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll @@ -33,27 +33,27 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2 ; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %valB, i16* %out, align 2 -; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 -; AVX1: LV: Found an estimated cost of 108 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 -; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 -; AVX1: LV: Found an estimated cost of 448 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 +; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 +; AVX1: LV: Found an estimated cost of 106 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 +; AVX1: LV: Found an estimated cost of 213 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 +; AVX1: LV: Found an estimated cost of 426 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2 ; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %valB, i16* %out, align 2 -; AVX2: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 -; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 -; AVX2: LV: Found an estimated cost of 64 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 -; AVX2: LV: Found an estimated cost of 128 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 +; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 +; AVX2: LV: Found an estimated cost of 26 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 +; AVX2: LV: Found an estimated cost of 53 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 +; AVX2: LV: Found an estimated cost of 106 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %valB, i16* %out, align 2 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 -; AVX512: LV: Found an estimated cost of 30 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 -; AVX512: LV: Found an estimated cost of 68 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 -; AVX512: LV: Found an estimated cost of 144 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 -; AVX512: LV: Found an estimated cost of 288 for VF 64 For instruction: store i16 %valB, i16* %out, align 2 +; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i16 %valB, i16* %out, align 2 +; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: store i16 %valB, i16* %out, align 2 +; AVX512: LV: Found an estimated cost of 55 for VF 16 For instruction: store i16 %valB, i16* %out, align 2 +; AVX512: LV: Found an estimated cost of 111 for VF 32 For instruction: store i16 %valB, i16* %out, align 2 +; AVX512: LV: Found an estimated cost of 222 for VF 64 For instruction: store i16 %valB, i16* %out, align 2 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll @@ -33,23 +33,23 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 -; AVX1: LV: Found an estimated cost of 448 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 107 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 214 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 +; AVX1: LV: Found an estimated cost of 428 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 14 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 32 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 64 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 -; AVX2: LV: Found an estimated cost of 128 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 27 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 54 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 +; AVX2: LV: Found an estimated cost of 108 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 +; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 18 for VF 16 For instruction: store i32 %valB, i32* %out, align 4 ; AVX512: LV: Found an estimated cost of 36 for VF 32 For instruction: store i32 %valB, i32* %out, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll @@ -33,23 +33,23 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 -; AVX1: LV: Found an estimated cost of 448 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 108 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 +; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 16 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 32 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 64 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 -; AVX2: LV: Found an estimated cost of 128 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 14 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 +; AVX2: LV: Found an estimated cost of 112 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 -; AVX512: LV: Found an estimated cost of 16 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 +; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %valB, i64* %out, align 8 ; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: store i64 %valB, i64* %out, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll @@ -33,27 +33,27 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1 ; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i8 %valB, i8* %out, align 1 -; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 -; AVX1: LV: Found an estimated cost of 108 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 -; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 -; AVX1: LV: Found an estimated cost of 448 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 +; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 +; AVX1: LV: Found an estimated cost of 106 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 +; AVX1: LV: Found an estimated cost of 212 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 +; AVX1: LV: Found an estimated cost of 425 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1 ; AVX2: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %valB, i8* %out, align 1 -; AVX2: LV: Found an estimated cost of 14 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 -; AVX2: LV: Found an estimated cost of 28 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 -; AVX2: LV: Found an estimated cost of 56 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 -; AVX2: LV: Found an estimated cost of 128 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 +; AVX2: LV: Found an estimated cost of 13 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 +; AVX2: LV: Found an estimated cost of 26 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 +; AVX2: LV: Found an estimated cost of 52 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 +; AVX2: LV: Found an estimated cost of 105 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: store i8 %valB, i8* %out, align 1 -; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 -; AVX512: LV: Found an estimated cost of 30 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 -; AVX512: LV: Found an estimated cost of 60 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 -; AVX512: LV: Found an estimated cost of 136 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 -; AVX512: LV: Found an estimated cost of 288 for VF 64 For instruction: store i8 %valB, i8* %out, align 1 +; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: store i8 %valB, i8* %out, align 1 +; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: store i8 %valB, i8* %out, align 1 +; AVX512: LV: Found an estimated cost of 54 for VF 16 For instruction: store i8 %valB, i8* %out, align 1 +; AVX512: LV: Found an estimated cost of 109 for VF 32 For instruction: store i8 %valB, i8* %out, align 1 +; AVX512: LV: Found an estimated cost of 219 for VF 64 For instruction: store i8 %valB, i8* %out, align 1 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll @@ -67,9 +67,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride2' @@ -178,9 +178,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 312 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride3' @@ -196,11 +196,11 @@ ; AVX512FVEC256-LABEL: 'replication_i16_stride3' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 540 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 456 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BWVEC512-LABEL: 'replication_i16_stride3' @@ -289,9 +289,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 384 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 356 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride4' @@ -400,9 +400,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 456 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 214 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 428 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride5' @@ -417,12 +417,12 @@ ; ; AVX512FVEC256-LABEL: 'replication_i16_stride5' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 223 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 446 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 892 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 752 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BWVEC512-LABEL: 'replication_i16_stride5' @@ -511,9 +511,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride6' @@ -528,12 +528,12 @@ ; ; AVX512FVEC256-LABEL: 'replication_i16_stride6' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 267 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 534 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1068 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 225 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 450 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 900 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BWVEC512-LABEL: 'replication_i16_stride6' @@ -622,9 +622,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 300 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 572 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride7' @@ -639,12 +639,12 @@ ; ; AVX512FVEC256-LABEL: 'replication_i16_stride7' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 151 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 311 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 622 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1244 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 524 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1048 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BWVEC512-LABEL: 'replication_i16_stride7' @@ -733,9 +733,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 161 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 322 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 644 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll @@ -53,9 +53,9 @@ ; AVX-LABEL: 'replication_i32_stride2' ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride2' @@ -126,9 +126,9 @@ ; AVX-LABEL: 'replication_i32_stride3' ; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride3' @@ -199,9 +199,9 @@ ; AVX-LABEL: 'replication_i32_stride4' ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride4' @@ -272,9 +272,9 @@ ; AVX-LABEL: 'replication_i32_stride5' ; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 248 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride5' @@ -345,9 +345,9 @@ ; AVX-LABEL: 'replication_i32_stride6' ; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride6' @@ -418,9 +418,9 @@ ; AVX-LABEL: 'replication_i32_stride7' ; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride7' @@ -491,9 +491,9 @@ ; AVX-LABEL: 'replication_i32_stride8' ; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 356 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll @@ -47,9 +47,9 @@ ; ; AVX-LABEL: 'replication_i64_stride2' ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride2' @@ -111,9 +111,9 @@ ; ; AVX-LABEL: 'replication_i64_stride3' ; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride3' @@ -175,9 +175,9 @@ ; ; AVX-LABEL: 'replication_i64_stride4' ; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride4' @@ -239,9 +239,9 @@ ; ; AVX-LABEL: 'replication_i64_stride5' ; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride5' @@ -303,9 +303,9 @@ ; ; AVX-LABEL: 'replication_i64_stride6' ; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride6' @@ -367,9 +367,9 @@ ; ; AVX-LABEL: 'replication_i64_stride7' ; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride7' @@ -431,9 +431,9 @@ ; ; AVX-LABEL: 'replication_i64_stride8' ; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll @@ -67,9 +67,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride2' @@ -178,9 +178,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 300 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 600 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 540 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride3' @@ -196,11 +196,11 @@ ; AVX512FVEC256-LABEL: 'replication_i8_stride3' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 526 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1052 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMIVEC512-LABEL: 'replication_i8_stride3' @@ -289,9 +289,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 169 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 338 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 676 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride4' @@ -400,9 +400,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 406 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 812 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride5' @@ -417,12 +417,12 @@ ; ; AVX512FVEC256-LABEL: 'replication_i8_stride5' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 209 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 435 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 870 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1740 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 720 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1440 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMIVEC512-LABEL: 'replication_i8_stride5' @@ -511,9 +511,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 474 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 948 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride6' @@ -528,12 +528,12 @@ ; ; AVX512FVEC256-LABEL: 'replication_i8_stride6' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 521 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1042 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2084 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 215 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 431 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 862 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1724 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMIVEC512-LABEL: 'replication_i8_stride6' @@ -622,9 +622,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 572 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1144 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 271 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 542 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1084 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride7' @@ -639,12 +639,12 @@ ; ; AVX512FVEC256-LABEL: 'replication_i8_stride7' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 149 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 295 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 607 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1214 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2428 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 502 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1004 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2008 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMIVEC512-LABEL: 'replication_i8_stride7' @@ -733,9 +733,9 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 305 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 610 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1220 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll --- a/llvm/test/Analysis/CostModel/X86/sitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -157,15 +157,15 @@ ; AVX-LABEL: 'sitofp_i64_double' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> -; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'sitofp_i64_double' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f64 = sitofp i64 undef to double ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %cvt_v4i64_v4f64 = sitofp <4 x i64> undef to <4 x double> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v8i64_v8f64 = sitofp <8 x i64> undef to <8 x double> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sitofp_i64_double' @@ -358,8 +358,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sitofp_i64_float' diff --git a/llvm/test/Analysis/CostModel/X86/trunc.ll b/llvm/test/Analysis/CostModel/X86/trunc.ll --- a/llvm/test/Analysis/CostModel/X86/trunc.ll +++ b/llvm/test/Analysis/CostModel/X86/trunc.ll @@ -315,29 +315,29 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -347,29 +347,29 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -382,29 +382,29 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -414,29 +414,29 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -516,29 +516,29 @@ ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -548,29 +548,29 @@ ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -650,29 +650,29 @@ ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -682,29 +682,29 @@ ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -784,29 +784,29 @@ ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -816,29 +816,29 @@ ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -851,29 +851,29 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -883,29 +883,29 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 294 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 588 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1008 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1176 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1295,29 +1295,29 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -1328,29 +1328,29 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -1361,29 +1361,29 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1397,29 +1397,29 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -1430,29 +1430,29 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -1463,29 +1463,29 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1601,29 +1601,29 @@ ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -1634,29 +1634,29 @@ ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -1667,29 +1667,29 @@ ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1805,29 +1805,29 @@ ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -1838,29 +1838,29 @@ ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -1871,29 +1871,29 @@ ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -2009,29 +2009,29 @@ ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -2042,29 +2042,29 @@ ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -2075,29 +2075,29 @@ ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -2111,29 +2111,29 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -2144,29 +2144,29 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -2177,29 +2177,29 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 197 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 246 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 279 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 492 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 574 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 984 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1148 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1968 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2296 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -2751,28 +2751,28 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> @@ -2786,28 +2786,28 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> @@ -2821,28 +2821,28 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8 = trunc i8 undef to i1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> @@ -2894,28 +2894,28 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> @@ -2929,28 +2929,28 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> @@ -2964,28 +2964,28 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8 = trunc i8 undef to i1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> @@ -3895,28 +3895,28 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i1 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> @@ -3930,28 +3930,28 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i1 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> @@ -3965,28 +3965,28 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8 = trunc i8 undef to i1 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1>