diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -4392,10 +4392,18 @@ std::pair LT = getTypeLegalizationCost(Ty); MVT MScalarTy = LT.second.getScalarType(); - unsigned SizeInBits = LT.second.getSizeInBits(); + unsigned LegalVectorBitWidth = LT.second.getSizeInBits(); TTI::TargetCostKind CostKind = TTI::TargetCostKind::TCK_RecipThroughput; InstructionCost Cost = 0; + constexpr unsigned LaneBitWidth = 128; + assert((LegalVectorBitWidth < LaneBitWidth || + (LegalVectorBitWidth % LaneBitWidth) == 0) && + "Illegal vector"); + + const int NumLegalVectors = *LT.first.getValue(); + assert(NumLegalVectors >= 0 && "Negative cost!"); + // For insertions, a ISD::BUILD_VECTOR style vector initialization can be much // cheaper than an accumulation of ISD::INSERT_VECTOR_ELT. if (Insert) { @@ -4404,7 +4412,7 @@ (MScalarTy == MVT::f32 && ST->hasSSE41())) { // For types we can insert directly, insertion into 128-bit sub vectors is // cheap, followed by a cheap chain of concatenations. - if (SizeInBits <= 128) { + if (LegalVectorBitWidth <= LaneBitWidth) { Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, false); } else { @@ -4420,32 +4428,51 @@ // Case#2: inserting into 5th index needs extracti128 + vpinsrd + // inserti128. // Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128. - const int CostValue = *LT.first.getValue(); - assert(CostValue >= 0 && "Negative cost!"); - unsigned Num128Lanes = SizeInBits / 128 * CostValue; - unsigned NumElts = LT.second.getVectorNumElements() * CostValue; - APInt WidenedDemandedElts = DemandedElts.zext(NumElts); - unsigned Scale = NumElts / Num128Lanes; - // We iterate each 128-lane, and check if we need a - // extracti128/inserti128 for this 128-lane. - for (unsigned I = 0; I < NumElts; I += Scale) { - APInt Mask = WidenedDemandedElts.getBitsSet(NumElts, I, I + Scale); - APInt MaskedDE = Mask & WidenedDemandedElts; - unsigned Population = MaskedDE.countPopulation(); - Cost += (Population > 0 && Population != Scale && - I % LT.second.getVectorNumElements() != 0); - Cost += Population > 0; + assert((LegalVectorBitWidth % LaneBitWidth) == 0 && "Illegal vector"); + unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth; + unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors; + unsigned NumLegalElts = + LT.second.getVectorNumElements() * NumLegalVectors; + assert(NumLegalElts >= DemandedElts.getBitWidth() && + "Vector has been legalized to smaller element count"); + assert((NumLegalElts % NumLanesTotal) == 0 && + "Unexpected elts per lane"); + unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal; + + APInt WidenedDemandedElts = DemandedElts.zext(NumLegalElts); + auto *LaneTy = + FixedVectorType::get(Ty->getElementType(), NumEltsPerLane); + + for (unsigned I = 0; I != NumLanesTotal; ++I) { + APInt LaneEltMask = WidenedDemandedElts.extractBits( + NumEltsPerLane, NumEltsPerLane * I); + if (LaneEltMask.isNullValue()) + continue; + // FIXME: we don't need to extract if all non-demanded elements + // are legalization-inserted padding. + if (!LaneEltMask.isAllOnes()) + Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind, + I * NumEltsPerLane, LaneTy); + Cost += BaseT::getScalarizationOverhead(LaneTy, LaneEltMask, Insert, + false); } - Cost += DemandedElts.countPopulation(); - // For vXf32 cases, insertion into the 0'th index in each v4f32 - // 128-bit vector is free. - // NOTE: This assumes legalization widens vXf32 vectors. - if (MScalarTy == MVT::f32) - for (unsigned i = 0, e = cast(Ty)->getNumElements(); - i < e; i += 4) - if (DemandedElts[i]) - Cost--; + APInt AffectedLanes = + APIntOps::ScaleBitMask(WidenedDemandedElts, NumLanesTotal); + APInt FullyAffectedLegalVectors = APIntOps::ScaleBitMask( + AffectedLanes, NumLegalVectors, /*MatchAllBits=*/true); + for (int LegalVec = 0; LegalVec != NumLegalVectors; ++LegalVec) { + for (unsigned Lane = 0; Lane != NumLegalLanes; ++Lane) { + unsigned I = NumLegalLanes * LegalVec + Lane; + // No need to insert unaffected lane; or lane 0 of each legal vector + // iff ALL lanes of that vector were affected and will be inserted. + if (!AffectedLanes[I] || + (Lane == 0 && FullyAffectedLegalVectors[LegalVec])) + continue; + Cost += getShuffleCost(TTI::SK_InsertSubvector, Ty, None, CostKind, + I * NumEltsPerLane, LaneTy); + } + } } } else if (LT.second.isVector()) { // Without fast insertion, we need to use MOVD/MOVQ to pass each demanded @@ -4477,39 +4504,36 @@ } if (LT.second.isVector()) { - int CostValue = *LT.first.getValue(); - assert(CostValue >= 0 && "Negative cost!"); - - unsigned NumElts = LT.second.getVectorNumElements() * CostValue; - assert(NumElts >= DemandedElts.getBitWidth() && + unsigned NumLegalElts = + LT.second.getVectorNumElements() * NumLegalVectors; + assert(NumLegalElts >= DemandedElts.getBitWidth() && "Vector has been legalized to smaller element count"); - // If we're extracting elements from a 128-bit subvector lane, we only need - // to extract each lane once, not for every element. - if (SizeInBits > 128) { - assert((SizeInBits % 128) == 0 && "Illegal vector"); - unsigned NumLegal128Lanes = SizeInBits / 128; - unsigned Num128Lanes = NumLegal128Lanes * CostValue; - APInt WidenedDemandedElts = DemandedElts.zext(NumElts); - unsigned Scale = NumElts / Num128Lanes; + // If we're extracting elements from a 128-bit subvector lane, + // we only need to extract each lane once, not for every element. + if (LegalVectorBitWidth > LaneBitWidth) { + unsigned NumLegalLanes = LegalVectorBitWidth / LaneBitWidth; + unsigned NumLanesTotal = NumLegalLanes * NumLegalVectors; + assert((NumLegalElts % NumLanesTotal) == 0 && + "Unexpected elts per lane"); + unsigned NumEltsPerLane = NumLegalElts / NumLanesTotal; // Add cost for each demanded 128-bit subvector extraction. // Luckily this is a lot easier than for insertion. - APInt DemandedUpper128Lanes = - APIntOps::ScaleBitMask(WidenedDemandedElts, Num128Lanes); - auto *Ty128 = FixedVectorType::get(Ty->getElementType(), Scale); - for (unsigned I = 0; I != Num128Lanes; ++I) - if (DemandedUpper128Lanes[I]) - Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind, - I * Scale, Ty128); - - // Add all the demanded element extractions together, but adjust the - // index to use the equivalent of the bottom 128 bit lane. - for (unsigned I = 0; I != NumElts; ++I) - if (WidenedDemandedElts[I]) { - unsigned Idx = I % Scale; - Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, Idx); - } + APInt WidenedDemandedElts = DemandedElts.zext(NumLegalElts); + auto *LaneTy = + FixedVectorType::get(Ty->getElementType(), NumEltsPerLane); + + for (unsigned I = 0; I != NumLanesTotal; ++I) { + APInt LaneEltMask = WidenedDemandedElts.extractBits( + NumEltsPerLane, I * NumEltsPerLane); + if (LaneEltMask.isNullValue()) + continue; + Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind, + I * NumEltsPerLane, LaneTy); + Cost += BaseT::getScalarizationOverhead(LaneTy, LaneEltMask, false, + Extract); + } return Cost; } diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll @@ -584,8 +584,8 @@ ; AVX-LABEL: 'fma' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll @@ -958,8 +958,8 @@ ; AVX-LABEL: 'fma' ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll @@ -892,8 +892,8 @@ ; AVX-LABEL: 'fma' ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -660,8 +660,8 @@ ; AVX-LABEL: 'frem' ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = frem <4 x float> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F32 = frem <8 x float> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = frem <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = frem <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16F32 = frem <16 x float> undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = frem <2 x double> undef, undef ; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = frem <4 x double> undef, undef @@ -671,8 +671,8 @@ ; AVX512-LABEL: 'frem' ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = frem float undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = frem <4 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F32 = frem <8 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = frem <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = frem <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F32 = frem <16 x float> undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = frem double undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = frem <2 x double> undef, undef ; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = frem <4 x double> undef, undef @@ -1035,8 +1035,8 @@ ; AVX-LABEL: 'fma' ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.fma.f32(float undef, float undef, float undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F64 = call double @llvm.fma.f64(double undef, double undef, double undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef) diff --git a/llvm/test/Analysis/CostModel/X86/bitreverse-codesize.ll b/llvm/test/Analysis/CostModel/X86/bitreverse-codesize.ll --- a/llvm/test/Analysis/CostModel/X86/bitreverse-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/bitreverse-codesize.ll @@ -250,15 +250,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v4i64' @@ -299,15 +299,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v8i64' @@ -397,15 +397,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v8i32' @@ -446,15 +446,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v16i32' @@ -532,15 +532,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v16i16' @@ -581,15 +581,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v32i16' diff --git a/llvm/test/Analysis/CostModel/X86/bitreverse-latency.ll b/llvm/test/Analysis/CostModel/X86/bitreverse-latency.ll --- a/llvm/test/Analysis/CostModel/X86/bitreverse-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/bitreverse-latency.ll @@ -250,15 +250,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v4i64' @@ -299,15 +299,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v8i64' @@ -397,15 +397,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v8i32' @@ -446,15 +446,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v16i32' @@ -532,15 +532,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v16i16' @@ -581,15 +581,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v32i16' diff --git a/llvm/test/Analysis/CostModel/X86/bitreverse-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/bitreverse-sizelatency.ll --- a/llvm/test/Analysis/CostModel/X86/bitreverse-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/bitreverse-sizelatency.ll @@ -250,15 +250,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v4i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v4i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v4i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i64> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v4i64' @@ -299,15 +299,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v8i64' -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v8i64' -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v8i64' -; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v8i64' @@ -397,15 +397,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v8i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v8i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v8i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v8i32' @@ -446,15 +446,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v16i32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v16i32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v16i32' -; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i32> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v16i32' @@ -532,15 +532,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v16i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v16i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v16i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i16> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v16i16' @@ -581,15 +581,15 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse ; ; AVX1-LABEL: 'var_bitreverse_v32i16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse ; ; AVX2-LABEL: 'var_bitreverse_v32i16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse ; ; AVX512-LABEL: 'var_bitreverse_v32i16' -; AVX512-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <32 x i16> %bitreverse ; ; XOP-LABEL: 'var_bitreverse_v32i16' diff --git a/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll b/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll --- a/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll @@ -15,8 +15,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = call float @llvm.maxnum.f32(float undef, float undef) diff --git a/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll b/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll --- a/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll @@ -15,8 +15,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %F32 = call float @llvm.minnum.f32(float undef, float undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F32 = call <2 x float> @llvm.minnum.v2f32(<2 x float> undef, <2 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F32 = call <4 x float> @llvm.minnum.v4f32(<4 x float> undef, <4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef ; %F32 = call float @llvm.minnum.f32(float undef, float undef) diff --git a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll --- a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll @@ -266,8 +266,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) @@ -276,8 +276,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) @@ -286,8 +286,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) @@ -296,8 +296,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) @@ -306,8 +306,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) @@ -316,8 +316,8 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'casts' @@ -369,8 +369,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) @@ -379,8 +379,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) @@ -389,8 +389,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) @@ -399,8 +399,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) @@ -409,8 +409,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) @@ -419,8 +419,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'casts' @@ -472,8 +472,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) @@ -482,8 +482,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f64u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f64s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f64(<4 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v8f32s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f32(<8 x float> undef) @@ -492,8 +492,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) @@ -502,8 +502,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f64u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f64s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f64u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %v16f32s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16f32u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f32(<16 x float> undef) @@ -512,8 +512,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) @@ -522,8 +522,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v16f64u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f64s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16f64u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f64(<16 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'casts' @@ -985,22 +985,22 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 214 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 181 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 214 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 181 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 177 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -1034,28 +1034,28 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 209 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 177 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 185 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 183 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512DQ-LABEL: 'fp16' @@ -1087,28 +1087,28 @@ ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 123 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 209 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 177 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 185 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 183 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) ; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SLM-LABEL: 'fp16' diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -28,15 +28,15 @@ ; AVX-LABEL: 'fptosi_double_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptosi_double_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi double undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x double> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptosi_double_i64' @@ -216,17 +216,17 @@ ; AVX-LABEL: 'fptosi_float_i64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptosi_float_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptosi float undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptosi <2 x float> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptosi_float_i64' diff --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll --- a/llvm/test/Analysis/CostModel/X86/fptoui.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll @@ -28,22 +28,22 @@ ; AVX1-LABEL: 'fptoui_double_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui double undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'fptoui_double_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = fptoui double undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptoui_double_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui double undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x double> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptoui_double_i64' @@ -223,25 +223,25 @@ ; AVX1-LABEL: 'fptoui_float_i64' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = fptoui float undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'fptoui_float_i64' ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I64 = fptoui float undef to i64 ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512F-LABEL: 'fptoui_float_i64' ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = fptoui float undef to i64 ; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = fptoui <2 x float> undef to <2 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'fptoui_float_i64' diff --git a/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll @@ -28,33 +28,33 @@ ; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX1: LV: Found an estimated cost of 194 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX1: LV: Found an estimated cost of 388 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX1: LV: Found an estimated cost of 193 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX1: LV: Found an estimated cost of 386 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 ; ; AVX2-SLOWGATHER-LABEL: 'test' ; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX2-SLOWGATHER: LV: Found an estimated cost of 8 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX2-SLOWGATHER: LV: Found an estimated cost of 16 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 34 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 68 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 33 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 66 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 ; ; AVX2-FASTGATHER-LABEL: 'test' ; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX2-FASTGATHER: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX2-FASTGATHER: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX2-FASTGATHER: LV: Found an estimated cost of 26 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-FASTGATHER: LV: Found an estimated cost of 54 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-FASTGATHER: LV: Found an estimated cost of 108 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-FASTGATHER: LV: Found an estimated cost of 53 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-FASTGATHER: LV: Found an estimated cost of 106 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i16, i16* %inB, align 2 ; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 56 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 112 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 224 for VF 64 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 55 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 111 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 222 for VF 64 For instruction: %valB = load i16, i16* %inB, align 2 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll @@ -34,17 +34,17 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4 ; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4 ; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4 -; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4 -; AVX1: LV: Found an estimated cost of 196 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4 -; AVX1: LV: Found an estimated cost of 392 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX1: LV: Found an estimated cost of 97 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX1: LV: Found an estimated cost of 194 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX1: LV: Found an estimated cost of 388 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4 ; ; AVX2-SLOWGATHER-LABEL: 'test' ; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4 ; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4 ; AVX2-SLOWGATHER: LV: Found an estimated cost of 8 for VF 4 For instruction: %valB = load i32, i32* %inB, align 4 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 18 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 36 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 72 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 17 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 34 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 68 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4 ; ; AVX2-FASTGATHER-LABEL: 'test' ; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll @@ -33,18 +33,18 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX1: LV: Found an estimated cost of 50 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX1: LV: Found an estimated cost of 400 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX1: LV: Found an estimated cost of 196 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX1: LV: Found an estimated cost of 392 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8 ; ; AVX2-SLOWGATHER-LABEL: 'test' ; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX2-SLOWGATHER: LV: Found an estimated cost of 4 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 10 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 20 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 40 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 80 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 9 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 18 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 36 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 72 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8 ; ; AVX2-FASTGATHER-LABEL: 'test' ; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8 @@ -57,7 +57,7 @@ ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 6 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX512: LV: Found an estimated cost of 15 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll @@ -36,7 +36,7 @@ ; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1 ; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1 ; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX1: LV: Found an estimated cost of 386 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX1: LV: Found an estimated cost of 385 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 ; ; AVX2-SLOWGATHER-LABEL: 'test' ; AVX2-SLOWGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1 @@ -44,7 +44,7 @@ ; AVX2-SLOWGATHER: LV: Found an estimated cost of 8 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1 ; AVX2-SLOWGATHER: LV: Found an estimated cost of 16 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1 ; AVX2-SLOWGATHER: LV: Found an estimated cost of 32 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 66 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 65 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 ; ; AVX2-FASTGATHER-LABEL: 'test' ; AVX2-FASTGATHER: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1 @@ -52,7 +52,7 @@ ; AVX2-FASTGATHER: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1 ; AVX2-FASTGATHER: LV: Found an estimated cost of 26 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1 ; AVX2-FASTGATHER: LV: Found an estimated cost of 52 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX2-FASTGATHER: LV: Found an estimated cost of 106 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX2-FASTGATHER: LV: Found an estimated cost of 105 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1 @@ -60,8 +60,8 @@ ; AVX512: LV: Found an estimated cost of 13 for VF 4 For instruction: %valB = load i8, i8* %inB, align 1 ; AVX512: LV: Found an estimated cost of 27 for VF 8 For instruction: %valB = load i8, i8* %inB, align 1 ; AVX512: LV: Found an estimated cost of 54 for VF 16 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 110 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 220 for VF 64 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 109 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 219 for VF 64 For instruction: %valB = load i8, i8* %inB, align 1 ; entry: br label %for.body diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 30 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 60 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 21 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 45 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 90 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 180 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 256 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll @@ -22,15 +22,15 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 75 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 150 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 75 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 150 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll @@ -22,8 +22,8 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll @@ -22,15 +22,15 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 224 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 105 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll @@ -22,15 +22,15 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 256 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 256 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll @@ -25,8 +25,8 @@ ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 72 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 144 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 70 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 140 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 7 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 10 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 20 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 288 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 284 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll @@ -25,8 +25,8 @@ ; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 51 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 216 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 105 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 210 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 12 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 59 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 432 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 426 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll @@ -25,8 +25,8 @@ ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 34 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 68 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 288 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 140 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 34 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 77 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 154 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 576 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 568 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll @@ -25,25 +25,25 @@ ; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 43 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 360 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 175 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 350 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX2: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 170 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 330 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 360 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 720 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 175 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 355 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 710 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll @@ -25,8 +25,8 @@ ; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 51 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 102 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 420 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 41 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 109 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 218 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 864 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 852 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll @@ -25,25 +25,25 @@ ; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 119 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 504 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 245 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 490 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 238 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 231 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 462 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 121 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 504 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1008 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 245 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 497 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 994 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll @@ -25,25 +25,25 @@ ; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX1: LV: Found an estimated cost of 136 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 576 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 560 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 272 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 528 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 68 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2 ; AVX512DQ: LV: Found an estimated cost of 136 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 576 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1152 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 568 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 1136 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 88 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 42 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 84 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 3 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 38 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 76 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 152 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 19 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 84 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 168 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 11 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 24 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 48 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 46 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 92 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 57 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 228 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 62 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 124 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 248 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 59 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 118 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 236 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 20 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 84 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 168 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 6 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 ; AVX1: LV: Found an estimated cost of 12 for VF 4 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 26 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 52 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 104 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 25 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 50 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 100 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll @@ -23,9 +23,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 320 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 76 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 152 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 304 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll @@ -22,15 +22,15 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 95 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 190 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 170 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll @@ -22,8 +22,8 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 228 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll @@ -22,15 +22,15 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 133 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 266 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 126 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 119 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 238 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll @@ -22,15 +22,15 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 320 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 152 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 304 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4 ; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 272 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll @@ -22,10 +22,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX1: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll @@ -21,9 +21,9 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 72 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll @@ -21,9 +21,9 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 176 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll @@ -20,14 +20,14 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 55 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 50 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 100 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll @@ -20,8 +20,8 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 144 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll @@ -20,14 +20,14 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 84 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 168 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 77 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 154 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 70 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 126 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll @@ -20,14 +20,14 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX1: LV: Found an estimated cost of 40 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 192 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 176 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 ; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 80 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 160 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 144 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 17 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 33 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 66 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 136 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 134 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 3 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 5 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 7 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 272 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 270 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 99 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 204 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 201 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 9 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 14 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 16 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 408 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 405 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 272 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 268 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 13 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 25 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 58 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 544 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 540 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 83 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 335 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -34,7 +34,7 @@ ; AVX2: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX2: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 330 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 325 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -42,8 +42,8 @@ ; AVX512DQ: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 680 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 335 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 675 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 99 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 198 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 408 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 402 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 45 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 85 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 816 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 810 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 118 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 231 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 469 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -34,7 +34,7 @@ ; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX2: LV: Found an estimated cost of 224 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 462 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 455 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -42,8 +42,8 @@ ; AVX512DQ: LV: Found an estimated cost of 62 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 233 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 952 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 469 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 945 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll @@ -26,7 +26,7 @@ ; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX1: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX1: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 536 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -34,7 +34,7 @@ ; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX2: LV: Found an estimated cost of 256 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX2: LV: Found an estimated cost of 528 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 520 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -42,8 +42,8 @@ ; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1 ; AVX512DQ: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1088 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 536 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 1080 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll @@ -22,10 +22,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 15 for VF 4 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 14 for VF 4 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 30 for VF 8 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 60 for VF 16 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 32 For instruction: store float %v1, ptr %out1, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, ptr %out1, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll @@ -21,11 +21,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 13 for VF 2 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 23 for VF 4 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 45 for VF 8 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 90 for VF 16 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 180 for VF 32 For instruction: store float %v2, ptr %out2, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, ptr %out2, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll @@ -21,11 +21,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 13 for VF 2 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 4 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 256 for VF 32 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: store float %v3, ptr %out3, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, ptr %out3, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll @@ -20,17 +20,17 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 38 for VF 4 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 75 for VF 8 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 150 for VF 16 For instruction: store float %v4, ptr %out4, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 17 for VF 2 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 38 for VF 4 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 160 for VF 16 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 16 for VF 2 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 36 for VF 4 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 75 for VF 8 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 150 for VF 16 For instruction: store float %v4, ptr %out4, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v4, ptr %out4, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 19 for VF 2 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 90 for VF 8 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: store float %v5, ptr %out5, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, ptr %out5, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll @@ -20,17 +20,17 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 50 for VF 4 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: store float %v6, ptr %out6, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 53 for VF 4 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 224 for VF 16 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 50 for VF 4 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 105 for VF 8 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 210 for VF 16 For instruction: store float %v6, ptr %out6, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v6, ptr %out6, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll @@ -20,17 +20,17 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 256 for VF 16 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: store float %v7, ptr %out7, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 4 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 256 for VF 16 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 240 for VF 16 For instruction: store float %v7, ptr %out7, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v7, ptr %out7, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll @@ -24,9 +24,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, ptr %out1, align 2 ; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 35 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 72 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 144 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX1: LV: Found an estimated cost of 70 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX1: LV: Found an estimated cost of 140 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 4 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 ; AVX512DQ: LV: Found an estimated cost of 5 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 ; AVX512DQ: LV: Found an estimated cost of 10 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX512DQ: LV: Found an estimated cost of 288 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX512DQ: LV: Found an estimated cost of 284 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, ptr %out1, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll @@ -23,10 +23,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 ; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 30 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 53 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 216 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: LV: Found an estimated cost of 29 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: LV: Found an estimated cost of 105 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: LV: Found an estimated cost of 210 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 15 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 ; AVX512DQ: LV: Found an estimated cost of 29 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 ; AVX512DQ: LV: Found an estimated cost of 57 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX512DQ: LV: Found an estimated cost of 432 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX512DQ: LV: Found an estimated cost of 426 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, ptr %out2, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll @@ -23,10 +23,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 35 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 70 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 288 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: LV: Found an estimated cost of 140 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: LV: Found an estimated cost of 280 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 11 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 ; AVX512DQ: LV: Found an estimated cost of 34 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 ; AVX512DQ: LV: Found an estimated cost of 68 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX512DQ: LV: Found an estimated cost of 576 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX512DQ: LV: Found an estimated cost of 568 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, ptr %out3, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll @@ -22,28 +22,28 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 175 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 350 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 45 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 88 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 180 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 44 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 175 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 350 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 ; AVX512DQ: LV: Found an estimated cost of 47 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 87 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 178 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 720 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 176 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 355 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 710 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, ptr %out4, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll @@ -22,11 +22,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 29 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 102 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 420 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 23 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 ; AVX512DQ: LV: Found an estimated cost of 61 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 ; AVX512DQ: LV: Found an estimated cost of 96 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX512DQ: LV: Found an estimated cost of 864 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX512DQ: LV: Found an estimated cost of 852 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, ptr %out5, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll @@ -22,28 +22,28 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 252 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 245 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 490 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 252 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 245 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 490 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 249 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 1008 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 122 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 246 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 497 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 994 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v6, ptr %out6, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll @@ -22,28 +22,28 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 70 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 288 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 560 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 70 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 288 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 560 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 69 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 138 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 284 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 1152 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 568 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 1136 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v7, ptr %out7, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll @@ -22,10 +22,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 19 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 18 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 38 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 76 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 152 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, ptr %out1, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll @@ -21,11 +21,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 29 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 57 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 228 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, ptr %out2, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll @@ -21,11 +21,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 19 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 38 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 320 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 76 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 152 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 304 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, ptr %out3, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll @@ -20,17 +20,17 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 46 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 95 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 190 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 25 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 100 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 200 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 46 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 95 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 190 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v4, ptr %out4, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 29 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 57 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 228 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, ptr %out5, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll @@ -20,17 +20,17 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 37 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 67 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 133 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 266 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 37 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 67 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 280 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 133 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 266 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v6, ptr %out6, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll @@ -20,17 +20,17 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 38 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 76 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 320 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 152 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 304 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 38 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 76 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 160 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 320 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 72 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 152 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 304 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v7, ptr %out7, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll @@ -21,11 +21,11 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, ptr %out1, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 72 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, ptr %out2, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll @@ -20,10 +20,10 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 22 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 176 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, ptr %out3, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll @@ -19,15 +19,15 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 55 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 60 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 55 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v4, ptr %out4, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll @@ -19,9 +19,9 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 144 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, ptr %out5, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll @@ -19,15 +19,15 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 39 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 84 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 168 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 77 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 154 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 39 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 84 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 168 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 77 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 154 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v6, ptr %out6, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll @@ -19,15 +19,15 @@ ; ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 192 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 40 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 176 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 96 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 192 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 40 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 176 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 ; ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v7, ptr %out7, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll @@ -25,8 +25,8 @@ ; AVX1: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, ptr %out1, align 1 ; AVX1: LV: Found an estimated cost of 2 for VF 4 For instruction: store i8 %v1, ptr %out1, align 1 ; AVX1: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX1: LV: Found an estimated cost of 67 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX1: LV: Found an estimated cost of 136 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX1: LV: Found an estimated cost of 66 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX1: LV: Found an estimated cost of 134 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 2 for VF 8 For instruction: store i8 %v1, ptr %out1, align 1 ; AVX512DQ: LV: Found an estimated cost of 4 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 ; AVX512DQ: LV: Found an estimated cost of 5 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX512DQ: LV: Found an estimated cost of 272 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX512DQ: LV: Found an estimated cost of 270 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, ptr %out1, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll @@ -24,9 +24,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 ; AVX1: LV: Found an estimated cost of 15 for VF 2 For instruction: store i8 %v2, ptr %out2, align 1 ; AVX1: LV: Found an estimated cost of 27 for VF 4 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 54 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 101 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 204 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX1: LV: Found an estimated cost of 53 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX1: LV: Found an estimated cost of 100 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX1: LV: Found an estimated cost of 201 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 9 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 ; AVX512DQ: LV: Found an estimated cost of 14 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 ; AVX512DQ: LV: Found an estimated cost of 15 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX512DQ: LV: Found an estimated cost of 408 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX512DQ: LV: Found an estimated cost of 405 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, ptr %out2, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll @@ -24,9 +24,9 @@ ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 ; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i8 %v3, ptr %out3, align 1 ; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 67 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 134 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 272 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX1: LV: Found an estimated cost of 268 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 5 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 ; AVX512DQ: LV: Found an estimated cost of 9 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 ; AVX512DQ: LV: Found an estimated cost of 14 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX512DQ: LV: Found an estimated cost of 544 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX512DQ: LV: Found an estimated cost of 540 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, ptr %out3, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll @@ -23,27 +23,27 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX1: LV: Found an estimated cost of 23 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 85 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 168 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 340 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX1: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX1: LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX1: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX1: LV: Found an estimated cost of 335 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX2: LV: Found an estimated cost of 23 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 85 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 168 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 340 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX2: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX2: LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX2: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX2: LV: Found an estimated cost of 335 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX512DQ: LV: Found an estimated cost of 23 for VF 2 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 ; AVX512DQ: LV: Found an estimated cost of 87 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 167 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 338 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 680 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: LV: Found an estimated cost of 336 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: LV: Found an estimated cost of 675 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v4, ptr %out4, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll @@ -23,10 +23,10 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 ; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 101 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 201 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 408 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX1: LV: Found an estimated cost of 198 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX1: LV: Found an estimated cost of 402 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 @@ -43,7 +43,7 @@ ; AVX512DQ: LV: Found an estimated cost of 19 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 ; AVX512DQ: LV: Found an estimated cost of 29 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 ; AVX512DQ: LV: Found an estimated cost of 93 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX512DQ: LV: Found an estimated cost of 816 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX512DQ: LV: Found an estimated cost of 810 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, ptr %out5, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll @@ -23,27 +23,27 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 476 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX1: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX1: LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX1: LV: Found an estimated cost of 469 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX2: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 476 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX2: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX2: LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX2: LV: Found an estimated cost of 469 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 ; AVX512DQ: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 122 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 473 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 952 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 234 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 470 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 945 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v6, ptr %out6, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll @@ -23,27 +23,27 @@ ; AVX1-LABEL: 'test' ; AVX1: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 134 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 268 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 544 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX1: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX1: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 ; ; AVX2-LABEL: 'test' ; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX2: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 134 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 268 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 544 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX2: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX2: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX2: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX2: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 ; ; AVX512DQ-LABEL: 'test' ; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 ; AVX512DQ: LV: Found an estimated cost of 33 for VF 2 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 133 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 266 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 540 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 1088 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 1080 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1 ; ; AVX512BW-LABEL: 'test' ; AVX512BW: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v7, ptr %out7, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll @@ -50,7 +50,7 @@ ; AVX512-LABEL: 'test' ; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8 -; AVX512: LV: Found an estimated cost of 23 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8 +; AVX512: LV: Found an estimated cost of 22 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %valB.loaded = load i64, i64* %inB, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll --- a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll @@ -28,8 +28,8 @@ ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 13 for VF 4 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 27 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 56 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 56 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 ; ; ENABLED_MASKED_STRIDED-LABEL: 'test1' ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -176,12 +176,12 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -235,12 +235,12 @@ ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -790,24 +790,24 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -817,24 +817,24 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -856,12 +856,12 @@ ; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -876,19 +876,19 @@ ; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -910,12 +910,12 @@ ; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -1182,24 +1182,24 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -1209,24 +1209,24 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -1789,13 +1789,13 @@ ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' @@ -1833,13 +1833,13 @@ ; AVX1-LABEL: 'test_gather_16f32_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_var_mask' @@ -1877,13 +1877,13 @@ ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' @@ -1927,7 +1927,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -1935,7 +1935,7 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> poison, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -176,12 +176,12 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -235,12 +235,12 @@ ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0v3i32(<3 x i32>* undef, i32 1, <3 x i1> undef, <3 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0v2i32(<2 x i32>* undef, i32 1, <2 x i1> undef, <2 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0v1i32(<1 x i32>* undef, i32 1, <1 x i1> undef, <1 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0v4i16(<4 x i16>* undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0v16i8(<16 x i8>* undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0v8i8(<8 x i8>* undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -790,24 +790,24 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -817,24 +817,24 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> undef, i32 1, <4 x i1> undef, <4 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> undef, i32 1, <2 x i1> undef, <2 x double> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0f64(<1 x double*> undef, i32 1, <1 x i1> undef, <1 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -856,12 +856,12 @@ ; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -876,19 +876,19 @@ ; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> undef, i32 1, <4 x i1> undef, <4 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> undef, i32 1, <2 x i1> undef, <2 x float> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> undef, i32 1, <2 x i1> undef, <2 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0i64(<1 x i64*> undef, i32 1, <1 x i1> undef, <1 x i64> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -910,12 +910,12 @@ ; SKX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> undef, i32 1, <4 x i1> undef, <4 x i32> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> undef, i32 1, <2 x i1> undef, <2 x i32> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0i16(<8 x i16*> undef, i32 1, <8 x i1> undef, <8 x i16> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*> undef, i32 1, <4 x i1> undef, <4 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0i8(<16 x i8*> undef, i32 1, <16 x i1> undef, <16 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0i8(<8 x i8*> undef, i32 1, <8 x i1> undef, <8 x i8> undef) ; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -1182,24 +1182,24 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -1209,24 +1209,24 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(double* undef, <4 x i1> undef, <4 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(double* undef, <2 x i1> undef, <2 x double> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(double* undef, <1 x i1> undef, <1 x double> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(float* undef, <4 x i1> undef, <4 x float> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(float* undef, <2 x i1> undef, <2 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(i64* undef, <2 x i1> undef, <2 x i64> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(i64* undef, <1 x i1> undef, <1 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(i32* undef, <4 x i1> undef, <4 x i32> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(i32* undef, <2 x i1> undef, <2 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(i16* undef, <8 x i1> undef, <8 x i16> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(i16* undef, <4 x i1> undef, <4 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(i8* undef, <16 x i1> undef, <16 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(i8* undef, <8 x i1> undef, <8 x i8> undef) ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 0 @@ -1789,13 +1789,13 @@ ; AVX1-LABEL: 'test_gather_16f32_const_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask' @@ -1833,13 +1833,13 @@ ; AVX1-LABEL: 'test_gather_16f32_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_var_mask' @@ -1877,13 +1877,13 @@ ; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_ra_var_mask' @@ -1927,7 +1927,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; AVX2-LABEL: 'test_gather_16f32_const_mask2' @@ -1935,7 +1935,7 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x float> %res ; ; SKL-LABEL: 'test_gather_16f32_const_mask2' diff --git a/llvm/test/Analysis/CostModel/X86/powi.ll b/llvm/test/Analysis/CostModel/X86/powi.ll --- a/llvm/test/Analysis/CostModel/X86/powi.ll +++ b/llvm/test/Analysis/CostModel/X86/powi.ll @@ -35,8 +35,8 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 %arg) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 %arg) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 %arg) -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 %arg) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 %arg) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 %arg) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 %arg) ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 %arg) ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 %arg) ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 %arg) @@ -48,8 +48,8 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.powi.f32.i32(float poison, i32 %arg) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.powi.v2f32.i32(<2 x float> poison, i32 %arg) ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.powi.v4f32.i32(<4 x float> poison, i32 %arg) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 %arg) -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 %arg) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 %arg) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 %arg) ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.powi.f64.i32(double poison, i32 %arg) ; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.powi.v2f64.i32(<2 x double> poison, i32 %arg) ; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.powi.v4f64.i32(<4 x double> poison, i32 %arg) diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll @@ -76,10 +76,10 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'replication_i1_stride2' @@ -87,10 +87,10 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride2' @@ -252,22 +252,22 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'replication_i1_stride3' ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <3 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <6 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <12 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 206 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 412 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride3' @@ -429,22 +429,22 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 552 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'replication_i1_stride4' ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <4 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 548 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride4' @@ -605,23 +605,23 @@ ; AVX1-LABEL: 'replication_i1_stride5' ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 344 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 688 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 334 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 668 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'replication_i1_stride5' ; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <5 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <10 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 342 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 684 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 332 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 664 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride5' @@ -782,23 +782,23 @@ ; AVX1-LABEL: 'replication_i1_stride6' ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 206 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 412 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 824 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'replication_i1_stride6' ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <6 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <12 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 398 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 796 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride6' @@ -959,23 +959,23 @@ ; AVX1-LABEL: 'replication_i1_stride7' ; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'replication_i1_stride7' ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <7 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <14 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 239 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 478 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 956 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride7' @@ -1136,23 +1136,23 @@ ; AVX1-LABEL: 'replication_i1_stride8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer ; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 548 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1096 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'replication_i1_stride8' ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %vf1 = shufflevector <1 x i1> undef, <1 x i1> poison, <8 x i32> zeroinitializer ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i1> undef, <2 x i1> poison, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 273 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 546 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1092 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 265 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 530 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1060 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i1_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll @@ -66,10 +66,10 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <2 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride2' @@ -176,11 +176,11 @@ ; AVX-LABEL: 'replication_i16_stride3' ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride3' @@ -196,11 +196,11 @@ ; AVX512FVEC256-LABEL: 'replication_i16_stride3' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <3 x i32> zeroinitializer ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <6 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 456 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 444 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BWVEC512-LABEL: 'replication_i16_stride3' @@ -287,11 +287,11 @@ ; AVX-LABEL: 'replication_i16_stride4' ; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <4 x i32> zeroinitializer ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 356 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride4' @@ -397,12 +397,12 @@ ; ; AVX-LABEL: 'replication_i16_stride5' ; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 214 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 428 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride5' @@ -417,12 +417,12 @@ ; ; AVX512FVEC256-LABEL: 'replication_i16_stride5' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <5 x i32> zeroinitializer -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 752 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 183 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 366 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 732 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BWVEC512-LABEL: 'replication_i16_stride5' @@ -508,12 +508,12 @@ ; ; AVX-LABEL: 'replication_i16_stride6' ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride6' @@ -528,12 +528,12 @@ ; ; AVX512FVEC256-LABEL: 'replication_i16_stride6' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <6 x i32> zeroinitializer -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 225 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 450 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 900 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 438 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 876 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BWVEC512-LABEL: 'replication_i16_stride6' @@ -619,12 +619,12 @@ ; ; AVX-LABEL: 'replication_i16_stride7' ; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 572 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride7' @@ -639,12 +639,12 @@ ; ; AVX512FVEC256-LABEL: 'replication_i16_stride7' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <7 x i32> zeroinitializer -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 524 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1048 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1020 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BWVEC512-LABEL: 'replication_i16_stride7' @@ -730,12 +730,12 @@ ; ; AVX-LABEL: 'replication_i16_stride8' ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf1 = shufflevector <1 x i16> undef, <1 x i16> poison, <8 x i32> zeroinitializer -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 161 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 322 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 644 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 306 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 612 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i16_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll @@ -52,10 +52,10 @@ ; ; AVX-LABEL: 'replication_i32_stride2' ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride2' @@ -124,11 +124,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i32_stride3' -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride3' @@ -197,11 +197,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i32_stride4' -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride4' @@ -270,11 +270,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i32_stride5' -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride5' @@ -343,11 +343,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i32_stride6' -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride6' @@ -416,11 +416,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i32_stride7' -; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride7' @@ -489,11 +489,11 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i32_stride8' -; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 356 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i32_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll @@ -46,10 +46,10 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i64_stride2' -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride2' @@ -110,10 +110,10 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i64_stride3' -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride3' @@ -174,10 +174,10 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i64_stride4' -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride4' @@ -238,10 +238,10 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i64_stride5' -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride5' @@ -302,10 +302,10 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i64_stride6' -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride6' @@ -366,10 +366,10 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i64_stride7' -; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride7' @@ -430,10 +430,10 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'replication_i64_stride8' -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i64_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll @@ -66,10 +66,10 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride2' @@ -176,11 +176,11 @@ ; AVX-LABEL: 'replication_i8_stride3' ; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 540 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride3' @@ -196,11 +196,11 @@ ; AVX512FVEC256-LABEL: 'replication_i8_stride3' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <6 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 215 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 430 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 860 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMIVEC512-LABEL: 'replication_i8_stride3' @@ -287,11 +287,11 @@ ; AVX-LABEL: 'replication_i8_stride4' ; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 169 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 338 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 676 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride4' @@ -397,12 +397,12 @@ ; ; AVX-LABEL: 'replication_i8_stride5' ; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 406 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 812 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride5' @@ -417,12 +417,12 @@ ; ; AVX512FVEC256-LABEL: 'replication_i8_stride5' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <10 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 720 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1440 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 177 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 355 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 710 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1420 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMIVEC512-LABEL: 'replication_i8_stride5' @@ -508,12 +508,12 @@ ; ; AVX-LABEL: 'replication_i8_stride6' ; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 474 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 948 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride6' @@ -528,12 +528,12 @@ ; ; AVX512FVEC256-LABEL: 'replication_i8_stride6' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 215 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 431 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 862 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1724 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 425 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 850 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1700 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMIVEC512-LABEL: 'replication_i8_stride6' @@ -619,12 +619,12 @@ ; ; AVX-LABEL: 'replication_i8_stride7' ; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 271 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 542 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1084 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride7' @@ -639,12 +639,12 @@ ; ; AVX512FVEC256-LABEL: 'replication_i8_stride7' ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <14 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 502 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1004 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2008 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 495 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 990 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1980 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMIVEC512-LABEL: 'replication_i8_stride7' @@ -730,12 +730,12 @@ ; ; AVX-LABEL: 'replication_i8_stride8' ; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i8> undef, <2 x i8> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 305 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 610 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1220 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 297 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 594 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1188 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512FVEC512-LABEL: 'replication_i8_stride8' diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll --- a/llvm/test/Analysis/CostModel/X86/sitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -358,8 +358,8 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cvt_i64_f32 = sitofp i64 undef to float ; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %cvt_v4i64_v4f32 = sitofp <4 x i64> undef to <4 x float> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX512DQ-LABEL: 'sitofp_i64_float' diff --git a/llvm/test/Analysis/CostModel/X86/trunc.ll b/llvm/test/Analysis/CostModel/X86/trunc.ll --- a/llvm/test/Analysis/CostModel/X86/trunc.ll +++ b/llvm/test/Analysis/CostModel/X86/trunc.ll @@ -315,29 +315,29 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -347,29 +347,29 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -382,29 +382,29 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -414,29 +414,29 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -516,29 +516,29 @@ ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -548,29 +548,29 @@ ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -650,29 +650,29 @@ ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -682,29 +682,29 @@ ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -784,29 +784,29 @@ ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -816,29 +816,29 @@ ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -851,29 +851,29 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V6i64 = trunc <6 x i64> undef to <6 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V7i64 = trunc <7 x i64> undef to <7 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = trunc <8 x i64> undef to <8 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i16> @@ -883,29 +883,29 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6i32 = trunc <6 x i32> undef to <6 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7i32 = trunc <7 x i32> undef to <7 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8i32 = trunc <8 x i32> undef to <8 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i16> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1295,29 +1295,29 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -1328,29 +1328,29 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -1361,29 +1361,29 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1397,29 +1397,29 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -1430,29 +1430,29 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -1463,29 +1463,29 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1601,29 +1601,29 @@ ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -1634,29 +1634,29 @@ ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -1667,29 +1667,29 @@ ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -1805,29 +1805,29 @@ ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -1838,29 +1838,29 @@ ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -1871,29 +1871,29 @@ ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -2009,29 +2009,29 @@ ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -2042,29 +2042,29 @@ ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -2075,29 +2075,29 @@ ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -2111,29 +2111,29 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 328 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 656 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1312 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i8> @@ -2144,29 +2144,29 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i8 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i8> @@ -2177,29 +2177,29 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i8> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; @@ -2751,28 +2751,28 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> @@ -2786,28 +2786,28 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> @@ -2821,28 +2821,28 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8 = trunc i8 undef to i1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> @@ -2894,28 +2894,28 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> @@ -2929,28 +2929,28 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> @@ -2964,28 +2964,28 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i1> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8 = trunc i8 undef to i1 ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> @@ -3895,28 +3895,28 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i64 = trunc <16 x i64> undef to <16 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V32i64 = trunc <32 x i64> undef to <32 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V64i64 = trunc <64 x i64> undef to <64 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V128i64 = trunc <128 x i64> undef to <128 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %V256i64 = trunc <256 x i64> undef to <256 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 368 for instruction: %V512i64 = trunc <512 x i64> undef to <512 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 736 for instruction: %V1024i64 = trunc <1024 x i64> undef to <1024 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i32 = trunc i32 undef to i1 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = trunc <2 x i32> undef to <2 x i1> @@ -3930,28 +3930,28 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i32 = trunc <16 x i32> undef to <16 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i32 = trunc <32 x i32> undef to <32 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64i32 = trunc <64 x i32> undef to <64 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V128i32 = trunc <128 x i32> undef to <128 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %V256i32 = trunc <256 x i32> undef to <256 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %V512i32 = trunc <512 x i32> undef to <512 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 672 for instruction: %V1024i32 = trunc <1024 x i32> undef to <1024 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i16 = trunc i16 undef to i1 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i16 = trunc <2 x i16> undef to <2 x i1> @@ -3965,28 +3965,28 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12i16 = trunc <12 x i16> undef to <12 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14i16 = trunc <14 x i16> undef to <14 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16i16 = trunc <16 x i16> undef to <16 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32i16 = trunc <32 x i16> undef to <32 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64i16 = trunc <64 x i16> undef to <64 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V128i16 = trunc <128 x i16> undef to <128 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %V256i16 = trunc <256 x i16> undef to <256 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %V512i16 = trunc <512 x i16> undef to <512 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %V1024i16 = trunc <1024 x i16> undef to <1024 x i1> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %i8 = trunc i8 undef to i1 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i8 = trunc <2 x i8> undef to <2 x i1> diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll --- a/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll @@ -121,7 +121,7 @@ ;; However, we should not take unconsecutive loads of pointers into account. ; CHECK: test_nonconsecutive_ptr_load ; CHECK: LV: The Smallest and Widest types: 16 / 64 bits. -; CHECK: LV: Selecting VF: 1 +; CHECK: LV: Selecting VF: 4 define void @test_nonconsecutive_ptr_load() nounwind ssp uwtable { br label %1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll @@ -4,32 +4,20 @@ define i32 @foo(i32* nocapture readonly %arr, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[A1:%.*]] -; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP0]], [[A2:%.*]] -; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[TMP0]], [[A3:%.*]] -; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP0]], [[A4:%.*]] -; CHECK-NEXT: [[ADD8:%.*]] = add i32 [[TMP0]], [[A5:%.*]] -; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[TMP0]], [[A6:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARR]], align 4 -; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[TMP1]], [[A7:%.*]] -; CHECK-NEXT: [[ADD14:%.*]] = add i32 [[TMP1]], [[A8:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[ADD2]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[ADD2]] -; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], [[ADD4]] -; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 [[ADD4]] -; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], [[ADD6]] -; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 [[ADD6]] -; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], [[ADD8]] -; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 [[ADD8]] -; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], [[ADD10]] -; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 [[ADD10]] -; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], [[ADD12]] -; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 [[ADD12]] -; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], [[ADD14]] -; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 [[ADD14]] -; CHECK-NEXT: ret i32 [[COND44]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <2 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]]) +; CHECK-NEXT: ret i32 [[TMP11]] ; entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 1