diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -4422,30 +4422,45 @@ // Case#3: inserting into 4,5,6,7 index needs 4*vpinsrd + inserti128. const int CostValue = *LT.first.getValue(); assert(CostValue >= 0 && "Negative cost!"); - unsigned Num128Lanes = SizeInBits / 128 * CostValue; + assert((SizeInBits % 128) == 0 && "Illegal vector"); + unsigned NumLegal128Lanes = SizeInBits / 128; + unsigned Num128Lanes = NumLegal128Lanes * CostValue; unsigned NumElts = LT.second.getVectorNumElements() * CostValue; APInt WidenedDemandedElts = DemandedElts.zext(NumElts); + assert((NumElts % Num128Lanes) == 0 && "Unexpected elts per lane"); unsigned Scale = NumElts / Num128Lanes; + + auto *Ty128 = FixedVectorType::get(Ty->getElementType(), Scale); + // We iterate each 128-lane, and check if we need a // extracti128/inserti128 for this 128-lane. - for (unsigned I = 0; I < NumElts; I += Scale) { - APInt Mask = WidenedDemandedElts.getBitsSet(NumElts, I, I + Scale); - APInt MaskedDE = Mask & WidenedDemandedElts; - unsigned Population = MaskedDE.countPopulation(); - Cost += (Population > 0 && Population != Scale && - I % LT.second.getVectorNumElements() != 0); - Cost += Population > 0; + for (unsigned I = 0; I != Num128Lanes; ++I) { + APInt LaneEltMask = WidenedDemandedElts.extractBits(Scale, Scale * I); + if (LaneEltMask.isNullValue()) + continue; + if (!LaneEltMask.isAllOnes()) + Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind, + I * Scale, Ty128); + Cost += BaseT::getScalarizationOverhead(Ty128, LaneEltMask, Insert, + false); } - Cost += DemandedElts.countPopulation(); - // For vXf32 cases, insertion into the 0'th index in each v4f32 - // 128-bit vector is free. - // NOTE: This assumes legalization widens vXf32 vectors. - if (MScalarTy == MVT::f32) - for (unsigned i = 0, e = cast(Ty)->getNumElements(); - i < e; i += 4) - if (DemandedElts[i]) - Cost--; + APInt AffectedLanes = + APIntOps::ScaleBitMask(WidenedDemandedElts, Num128Lanes); + APInt FullyAffectedLegalVectors = APIntOps::ScaleBitMask( + AffectedLanes, CostValue, /*MatchAllBits=*/true); + for (int LegalVec = 0; LegalVec != CostValue; ++LegalVec) { + for (unsigned Lane = 0; Lane != NumLegal128Lanes; ++Lane) { + unsigned I = NumLegal128Lanes * LegalVec + Lane; + // No need to insert unaffected lane; or lane 0 of each legal vector + // iff ALL lanes of that vector were affected and will be inserted. + if (!AffectedLanes[I] || + (Lane == 0 && FullyAffectedLegalVectors[LegalVec])) + continue; + Cost += getShuffleCost(TTI::SK_InsertSubvector, Ty, None, CostKind, + I * Scale, Ty128); + } + } } } else if (LT.second.isVector()) { // Without fast insertion, we need to use MOVD/MOVQ to pass each demanded @@ -4484,32 +4499,28 @@ assert(NumElts >= DemandedElts.getBitWidth() && "Vector has been legalized to smaller element count"); - // If we're extracting elements from a 128-bit subvector lane, we only need - // to extract each lane once, not for every element. + // If we're extracting elements from a 128-bit subvector lane, + // we only need to extract each lane once, not for every element. if (SizeInBits > 128) { assert((SizeInBits % 128) == 0 && "Illegal vector"); unsigned NumLegal128Lanes = SizeInBits / 128; unsigned Num128Lanes = NumLegal128Lanes * CostValue; APInt WidenedDemandedElts = DemandedElts.zext(NumElts); + assert((NumElts % Num128Lanes) == 0 && "Unexpected elts per lane"); unsigned Scale = NumElts / Num128Lanes; // Add cost for each demanded 128-bit subvector extraction. // Luckily this is a lot easier than for insertion. - APInt DemandedUpper128Lanes = - APIntOps::ScaleBitMask(WidenedDemandedElts, Num128Lanes); auto *Ty128 = FixedVectorType::get(Ty->getElementType(), Scale); - for (unsigned I = 0; I != Num128Lanes; ++I) - if (DemandedUpper128Lanes[I]) - Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind, - I * Scale, Ty128); - - // Add all the demanded element extractions together, but adjust the - // index to use the equivalent of the bottom 128 bit lane. - for (unsigned I = 0; I != NumElts; ++I) - if (WidenedDemandedElts[I]) { - unsigned Idx = I % Scale; - Cost += getVectorInstrCost(Instruction::ExtractElement, Ty, Idx); - } + for (unsigned I = 0; I != Num128Lanes; ++I) { + APInt LaneEltMask = WidenedDemandedElts.extractBits(Scale, I * Scale); + if (LaneEltMask.isNullValue()) + continue; + Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, CostKind, + I * Scale, Ty128); + Cost += BaseT::getScalarizationOverhead(Ty128, LaneEltMask, false, + Extract); + } return Cost; } diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-codesize.ll @@ -587,2 +587,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-latency.ll @@ -961,2 +961,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp-sizelatency.ll @@ -895,2 +895,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/arith-fp.ll b/llvm/test/Analysis/CostModel/X86/arith-fp.ll --- a/llvm/test/Analysis/CostModel/X86/arith-fp.ll +++ b/llvm/test/Analysis/CostModel/X86/arith-fp.ll @@ -663,2 +663,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F32 = frem <8 x float> undef, undef -; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = frem <16 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = frem <8 x float> undef, undef +; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16F32 = frem <16 x float> undef, undef @@ -674,2 +674,2 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8F32 = frem <8 x float> undef, undef -; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16F32 = frem <16 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = frem <8 x float> undef, undef +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16F32 = frem <16 x float> undef, undef @@ -1038,2 +1038,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/bitreverse-codesize.ll b/llvm/test/Analysis/CostModel/X86/bitreverse-codesize.ll --- a/llvm/test/Analysis/CostModel/X86/bitreverse-codesize.ll +++ b/llvm/test/Analysis/CostModel/X86/bitreverse-codesize.ll @@ -253 +253 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) @@ -257 +257 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) @@ -261 +261 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) @@ -302 +302 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) @@ -306 +306 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) @@ -310 +310 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) @@ -400 +400 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) @@ -404 +404 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) @@ -408 +408 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) @@ -449 +449 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) @@ -453 +453 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) @@ -457 +457 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) @@ -535 +535 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) @@ -539 +539 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) @@ -543 +543 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) @@ -584 +584 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) @@ -588 +588 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) @@ -592 +592 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) diff --git a/llvm/test/Analysis/CostModel/X86/bitreverse-latency.ll b/llvm/test/Analysis/CostModel/X86/bitreverse-latency.ll --- a/llvm/test/Analysis/CostModel/X86/bitreverse-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/bitreverse-latency.ll @@ -253 +253 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) @@ -257 +257 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) @@ -261 +261 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) @@ -302 +302 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) @@ -306 +306 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) @@ -310 +310 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) @@ -400 +400 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) @@ -404 +404 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) @@ -408 +408 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) @@ -449 +449 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) @@ -453 +453 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) @@ -457 +457 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) @@ -535 +535 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) @@ -539 +539 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) @@ -543 +543 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) @@ -584 +584 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) @@ -588 +588 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) @@ -592 +592 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) diff --git a/llvm/test/Analysis/CostModel/X86/bitreverse-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/bitreverse-sizelatency.ll --- a/llvm/test/Analysis/CostModel/X86/bitreverse-sizelatency.ll +++ b/llvm/test/Analysis/CostModel/X86/bitreverse-sizelatency.ll @@ -253 +253 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) @@ -257 +257 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) @@ -261 +261 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %bitreverse = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a) @@ -302 +302 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) @@ -306 +306 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) @@ -310 +310 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %bitreverse = call <8 x i64> @llvm.bitreverse.v8i64(<8 x i64> %a) @@ -400 +400 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) @@ -404 +404 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) @@ -408 +408 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %bitreverse = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a) @@ -449 +449 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) @@ -453 +453 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) @@ -457 +457 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %bitreverse = call <16 x i32> @llvm.bitreverse.v16i32(<16 x i32> %a) @@ -535 +535 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) @@ -539 +539 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) @@ -543 +543 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %bitreverse = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a) @@ -584 +584 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX1-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) @@ -588 +588 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX2-NEXT: Cost Model: Found an estimated cost of 388 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) @@ -592 +592 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) +; AVX512-NEXT: Cost Model: Found an estimated cost of 390 for instruction: %bitreverse = call <32 x i16> @llvm.bitreverse.v32i16(<32 x i16> %a) diff --git a/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll b/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll --- a/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/fmaxnum-size-latency.ll @@ -18,2 +18,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll b/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll --- a/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll +++ b/llvm/test/Analysis/CostModel/X86/fminnum-size-latency.ll @@ -18,2 +18,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V8F32 = call <8 x float> @llvm.minnum.v8f32(<8 x float> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V16F32 = call <16 x float> @llvm.minnum.v16f32(<16 x float> undef, <16 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll --- a/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoi_sat.ll @@ -269,2 +269,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) @@ -279,2 +279,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) @@ -289,2 +289,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) @@ -299,2 +299,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) @@ -309,2 +309,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) @@ -319,2 +319,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) @@ -372,2 +372,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) @@ -382,2 +382,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) @@ -392,2 +392,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) @@ -402,2 +402,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) @@ -412,2 +412,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) @@ -422,2 +422,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) @@ -475,2 +475,2 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) @@ -485,2 +485,2 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v4f64s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f64(<4 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v4f64u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f64(<4 x double> undef) @@ -495,2 +495,2 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) @@ -505,2 +505,2 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v8f64s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v8f64u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> undef) @@ -515,2 +515,2 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) @@ -525,2 +525,2 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f64s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f64(<16 x double> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v16f64u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f64(<16 x double> undef) @@ -988,6 +988,6 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) @@ -998,6 +998,6 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 214 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 181 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 214 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 181 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 177 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) @@ -1037,2 +1037,2 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) @@ -1041,8 +1041,8 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) @@ -1051,8 +1051,8 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 209 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 177 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) -; AVX512F-NEXT: Cost Model: Found an estimated cost of 185 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) +; AVX512F-NEXT: Cost Model: Found an estimated cost of 183 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) @@ -1090,2 +1090,2 @@ -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) @@ -1094,8 +1094,8 @@ -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) @@ -1104,8 +1104,8 @@ -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 211 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 209 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 177 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) -; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 185 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) +; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 183 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) diff --git a/llvm/test/Analysis/CostModel/X86/fptosi.ll b/llvm/test/Analysis/CostModel/X86/fptosi.ll --- a/llvm/test/Analysis/CostModel/X86/fptosi.ll +++ b/llvm/test/Analysis/CostModel/X86/fptosi.ll @@ -31,2 +31,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> @@ -38,2 +38,2 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptosi <4 x double> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x double> undef to <8 x i64> @@ -219,3 +219,3 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> @@ -227,3 +227,3 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptosi <4 x float> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptosi <8 x float> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16I64 = fptosi <16 x float> undef to <16 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/fptoui.ll b/llvm/test/Analysis/CostModel/X86/fptoui.ll --- a/llvm/test/Analysis/CostModel/X86/fptoui.ll +++ b/llvm/test/Analysis/CostModel/X86/fptoui.ll @@ -31,2 +31,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> @@ -38,2 +38,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> @@ -45,2 +45,2 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptoui <4 x double> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptoui <8 x double> undef to <8 x i64> @@ -226,3 +226,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> @@ -234,3 +234,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> @@ -242,3 +242,3 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = fptoui <4 x float> undef to <4 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = fptoui <8 x float> undef to <8 x i64> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16I64 = fptoui <16 x float> undef to <16 x i64> diff --git a/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll @@ -31,2 +31,2 @@ -; AVX1: LV: Found an estimated cost of 194 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX1: LV: Found an estimated cost of 388 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX1: LV: Found an estimated cost of 193 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX1: LV: Found an estimated cost of 386 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 @@ -39,2 +39,2 @@ -; AVX2-SLOWGATHER: LV: Found an estimated cost of 34 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 68 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 33 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 66 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 @@ -47,2 +47,2 @@ -; AVX2-FASTGATHER: LV: Found an estimated cost of 54 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX2-FASTGATHER: LV: Found an estimated cost of 108 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-FASTGATHER: LV: Found an estimated cost of 53 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX2-FASTGATHER: LV: Found an estimated cost of 106 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 @@ -55,3 +55,3 @@ -; AVX512: LV: Found an estimated cost of 56 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 112 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 -; AVX512: LV: Found an estimated cost of 224 for VF 64 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 55 for VF 16 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 111 for VF 32 For instruction: %valB = load i16, i16* %inB, align 2 +; AVX512: LV: Found an estimated cost of 222 for VF 64 For instruction: %valB = load i16, i16* %inB, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll @@ -37,3 +37,3 @@ -; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4 -; AVX1: LV: Found an estimated cost of 196 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4 -; AVX1: LV: Found an estimated cost of 392 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX1: LV: Found an estimated cost of 97 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX1: LV: Found an estimated cost of 194 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX1: LV: Found an estimated cost of 388 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4 @@ -45,3 +45,3 @@ -; AVX2-SLOWGATHER: LV: Found an estimated cost of 18 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 36 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 72 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 17 for VF 8 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 34 for VF 16 For instruction: %valB = load i32, i32* %inB, align 4 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 68 for VF 32 For instruction: %valB = load i32, i32* %inB, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll @@ -36,4 +36,4 @@ -; AVX1: LV: Found an estimated cost of 50 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX1: LV: Found an estimated cost of 400 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX1: LV: Found an estimated cost of 49 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX1: LV: Found an estimated cost of 98 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX1: LV: Found an estimated cost of 196 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX1: LV: Found an estimated cost of 392 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8 @@ -44,4 +44,4 @@ -; AVX2-SLOWGATHER: LV: Found an estimated cost of 10 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 20 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 40 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8 -; AVX2-SLOWGATHER: LV: Found an estimated cost of 80 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 9 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 18 for VF 8 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 36 for VF 16 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 72 for VF 32 For instruction: %valB = load i64, i64* %inB, align 8 @@ -60 +60 @@ -; AVX512: LV: Found an estimated cost of 15 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 +; AVX512: LV: Found an estimated cost of 14 for VF 4 For instruction: %valB = load i64, i64* %inB, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll @@ -39 +39 @@ -; AVX1: LV: Found an estimated cost of 386 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX1: LV: Found an estimated cost of 385 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 @@ -47 +47 @@ -; AVX2-SLOWGATHER: LV: Found an estimated cost of 66 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX2-SLOWGATHER: LV: Found an estimated cost of 65 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 @@ -55 +55 @@ -; AVX2-FASTGATHER: LV: Found an estimated cost of 106 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX2-FASTGATHER: LV: Found an estimated cost of 105 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 @@ -63,2 +63,2 @@ -; AVX512: LV: Found an estimated cost of 110 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 -; AVX512: LV: Found an estimated cost of 220 for VF 64 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 109 for VF 32 For instruction: %valB = load i8, i8* %inB, align 1 +; AVX512: LV: Found an estimated cost of 219 for VF 64 For instruction: %valB = load i8, i8* %inB, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll @@ -26,3 +26,3 @@ -; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 30 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 60 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll @@ -26,3 +26,3 @@ -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 45 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 90 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 180 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll @@ -26,3 +26,3 @@ -; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 256 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll @@ -25,2 +25,2 @@ -; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 75 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 150 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 @@ -32,2 +32,2 @@ -; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 75 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 150 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll @@ -25,2 +25,2 @@ -; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll @@ -25,2 +25,2 @@ -; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 @@ -32,2 +32,2 @@ -; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 224 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 105 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll @@ -25,2 +25,2 @@ -; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 256 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 @@ -33 +33 @@ -; AVX2: LV: Found an estimated cost of 256 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll @@ -28,2 +28,2 @@ -; AVX1: LV: Found an estimated cost of 72 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 144 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 70 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 140 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 288 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 284 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll @@ -28,2 +28,2 @@ -; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 216 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 105 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 210 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 432 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 426 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll @@ -28,2 +28,2 @@ -; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 288 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 140 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 576 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 568 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll @@ -28,2 +28,2 @@ -; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 360 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 175 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 350 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -36,2 +36,2 @@ -; AVX2: LV: Found an estimated cost of 170 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 330 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -44,3 +44,3 @@ -; AVX512DQ: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 360 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 720 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 175 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 355 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 710 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll @@ -28,2 +28,2 @@ -; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 420 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 864 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 852 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll @@ -28,2 +28,2 @@ -; AVX1: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 504 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 245 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 490 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -36,2 +36,2 @@ -; AVX2: LV: Found an estimated cost of 238 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 231 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 462 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -44,3 +44,3 @@ -; AVX512DQ: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 504 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1008 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 245 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 497 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 994 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll @@ -28,2 +28,2 @@ -; AVX1: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX1: LV: Found an estimated cost of 576 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX1: LV: Found an estimated cost of 560 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -36,2 +36,2 @@ -; AVX2: LV: Found an estimated cost of 272 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX2: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX2: LV: Found an estimated cost of 528 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 @@ -44,3 +44,3 @@ -; AVX512DQ: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 576 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 -; AVX512DQ: LV: Found an estimated cost of 1152 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 568 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2 +; AVX512DQ: LV: Found an estimated cost of 1136 for VF 64 For instruction: %v0 = load i16, ptr %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll @@ -26,3 +26,3 @@ -; AVX1: LV: Found an estimated cost of 22 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 44 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 88 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 21 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 42 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 84 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll @@ -26,3 +26,3 @@ -; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 38 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 76 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 152 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll @@ -26,3 +26,3 @@ -; AVX1: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 84 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 168 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll @@ -26,3 +26,3 @@ -; AVX1: LV: Found an estimated cost of 24 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 48 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 23 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 46 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 92 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll @@ -26,3 +26,3 @@ -; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 57 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 228 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll @@ -26,3 +26,3 @@ -; AVX1: LV: Found an estimated cost of 62 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 124 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 248 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 59 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 118 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 236 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll @@ -26,3 +26,3 @@ -; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 42 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 84 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 168 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll @@ -26,3 +26,3 @@ -; AVX1: LV: Found an estimated cost of 26 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 52 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 -; AVX1: LV: Found an estimated cost of 104 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 25 for VF 8 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 50 for VF 16 For instruction: %v0 = load i32, i32* %in0, align 4 +; AVX1: LV: Found an estimated cost of 100 for VF 32 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll @@ -26,3 +26,3 @@ -; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 320 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 76 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 152 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 304 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll @@ -25,2 +25,2 @@ -; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 95 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 190 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 @@ -32,2 +32,2 @@ -; AVX2: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 180 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 170 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll @@ -25,2 +25,2 @@ -; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 228 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll @@ -25,2 +25,2 @@ -; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 133 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 266 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 @@ -32,2 +32,2 @@ -; AVX2: LV: Found an estimated cost of 126 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX2: LV: Found an estimated cost of 252 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 119 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 238 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll @@ -25,2 +25,2 @@ -; AVX1: LV: Found an estimated cost of 160 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 -; AVX1: LV: Found an estimated cost of 320 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 152 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX1: LV: Found an estimated cost of 304 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 @@ -33 +33 @@ -; AVX2: LV: Found an estimated cost of 288 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 +; AVX2: LV: Found an estimated cost of 272 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll @@ -25,4 +25,4 @@ -; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll @@ -24,3 +24,3 @@ -; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 72 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll @@ -24,3 +24,3 @@ -; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 176 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll @@ -23,2 +23,2 @@ -; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 55 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 @@ -29,2 +29,2 @@ -; AVX2: LV: Found an estimated cost of 50 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 100 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll @@ -23,2 +23,2 @@ -; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 144 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll @@ -23,2 +23,2 @@ -; AVX1: LV: Found an estimated cost of 84 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 168 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 77 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 154 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 @@ -29,2 +29,2 @@ -; AVX2: LV: Found an estimated cost of 70 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 126 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll @@ -23,2 +23,2 @@ -; AVX1: LV: Found an estimated cost of 96 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX1: LV: Found an estimated cost of 192 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX1: LV: Found an estimated cost of 176 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 @@ -29,2 +29,2 @@ -; AVX2: LV: Found an estimated cost of 80 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 -; AVX2: LV: Found an estimated cost of 160 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8 +; AVX2: LV: Found an estimated cost of 144 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll @@ -29 +29 @@ -; AVX1: LV: Found an estimated cost of 136 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 134 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 272 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 270 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll @@ -29 +29 @@ -; AVX1: LV: Found an estimated cost of 204 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 201 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 408 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 405 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll @@ -29 +29 @@ -; AVX1: LV: Found an estimated cost of 272 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 268 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 544 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 540 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll @@ -29 +29 @@ -; AVX1: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 335 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -37 +37 @@ -; AVX2: LV: Found an estimated cost of 330 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 325 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -45,2 +45,2 @@ -; AVX512DQ: LV: Found an estimated cost of 340 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 680 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 335 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 675 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll @@ -29 +29 @@ -; AVX1: LV: Found an estimated cost of 408 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 402 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 816 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 810 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll @@ -29 +29 @@ -; AVX1: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 469 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -37 +37 @@ -; AVX2: LV: Found an estimated cost of 462 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 455 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -45,2 +45,2 @@ -; AVX512DQ: LV: Found an estimated cost of 476 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 952 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 469 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 945 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll @@ -29 +29 @@ -; AVX1: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX1: LV: Found an estimated cost of 536 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -37 +37 @@ -; AVX2: LV: Found an estimated cost of 528 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX2: LV: Found an estimated cost of 520 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 @@ -45,2 +45,2 @@ -; AVX512DQ: LV: Found an estimated cost of 544 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 -; AVX512DQ: LV: Found an estimated cost of 1088 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 536 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1 +; AVX512DQ: LV: Found an estimated cost of 1080 for VF 64 For instruction: %v0 = load i8, ptr %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll @@ -25,4 +25,4 @@ -; AVX1: LV: Found an estimated cost of 15 for VF 4 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 32 for VF 8 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 64 for VF 16 For instruction: store float %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 128 for VF 32 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 14 for VF 4 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 30 for VF 8 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 60 for VF 16 For instruction: store float %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 32 For instruction: store float %v1, ptr %out1, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll @@ -24,5 +24,5 @@ -; AVX1: LV: Found an estimated cost of 13 for VF 2 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 23 for VF 4 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store float %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 45 for VF 8 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 90 for VF 16 For instruction: store float %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 180 for VF 32 For instruction: store float %v2, ptr %out2, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll @@ -24,5 +24,5 @@ -; AVX1: LV: Found an estimated cost of 13 for VF 2 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 30 for VF 4 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 64 for VF 8 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 128 for VF 16 For instruction: store float %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 256 for VF 32 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: store float %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: store float %v3, ptr %out3, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-5.ll @@ -23,4 +23,4 @@ -; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 38 for VF 4 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: store float %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 75 for VF 8 For instruction: store float %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 150 for VF 16 For instruction: store float %v4, ptr %out4, align 4 @@ -30,4 +30,4 @@ -; AVX2: LV: Found an estimated cost of 17 for VF 2 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 38 for VF 4 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: store float %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 160 for VF 16 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 16 for VF 2 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 36 for VF 4 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 75 for VF 8 For instruction: store float %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 150 for VF 16 For instruction: store float %v4, ptr %out4, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll @@ -23,4 +23,4 @@ -; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store float %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 19 for VF 2 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 42 for VF 4 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 90 for VF 8 For instruction: store float %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: store float %v5, ptr %out5, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-7.ll @@ -23,4 +23,4 @@ -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 224 for VF 16 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 50 for VF 4 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: store float %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: store float %v6, ptr %out6, align 4 @@ -30,4 +30,4 @@ -; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 53 for VF 4 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: store float %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 224 for VF 16 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 50 for VF 4 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 105 for VF 8 For instruction: store float %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 210 for VF 16 For instruction: store float %v6, ptr %out6, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-8.ll @@ -23,4 +23,4 @@ -; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 128 for VF 8 For instruction: store float %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 256 for VF 16 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store float %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: store float %v7, ptr %out7, align 4 @@ -30,4 +30,4 @@ -; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 60 for VF 4 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: store float %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 256 for VF 16 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store float %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 240 for VF 16 For instruction: store float %v7, ptr %out7, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll @@ -27,3 +27,3 @@ -; AVX1: LV: Found an estimated cost of 35 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 72 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 -; AVX1: LV: Found an estimated cost of 144 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 8 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX1: LV: Found an estimated cost of 70 for VF 16 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX1: LV: Found an estimated cost of 140 for VF 32 For instruction: store i16 %v1, ptr %out1, align 2 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 288 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2 +; AVX512DQ: LV: Found an estimated cost of 284 for VF 64 For instruction: store i16 %v1, ptr %out1, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll @@ -26,4 +26,4 @@ -; AVX1: LV: Found an estimated cost of 30 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 53 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 108 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 -; AVX1: LV: Found an estimated cost of 216 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: LV: Found an estimated cost of 29 for VF 4 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: LV: Found an estimated cost of 52 for VF 8 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: LV: Found an estimated cost of 105 for VF 16 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX1: LV: Found an estimated cost of 210 for VF 32 For instruction: store i16 %v2, ptr %out2, align 2 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 432 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2 +; AVX512DQ: LV: Found an estimated cost of 426 for VF 64 For instruction: store i16 %v2, ptr %out2, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll @@ -26,4 +26,4 @@ -; AVX1: LV: Found an estimated cost of 35 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 70 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 -; AVX1: LV: Found an estimated cost of 288 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 4 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: LV: Found an estimated cost of 68 for VF 8 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: LV: Found an estimated cost of 140 for VF 16 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX1: LV: Found an estimated cost of 280 for VF 32 For instruction: store i16 %v3, ptr %out3, align 2 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 576 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2 +; AVX512DQ: LV: Found an estimated cost of 568 for VF 64 For instruction: store i16 %v3, ptr %out3, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll @@ -25,5 +25,5 @@ -; AVX1: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 45 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 180 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX1: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 175 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX1: LV: Found an estimated cost of 350 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 @@ -33,5 +33,5 @@ -; AVX2: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 45 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 88 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 180 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX2: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 44 for VF 4 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 175 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX2: LV: Found an estimated cost of 350 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 @@ -41 +41 @@ -; AVX512DQ: LV: Found an estimated cost of 27 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v4, ptr %out4, align 2 @@ -43,4 +43,4 @@ -; AVX512DQ: LV: Found an estimated cost of 87 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 178 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 360 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 -; AVX512DQ: LV: Found an estimated cost of 720 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 86 for VF 8 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 176 for VF 16 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 355 for VF 32 For instruction: store i16 %v4, ptr %out4, align 2 +; AVX512DQ: LV: Found an estimated cost of 710 for VF 64 For instruction: store i16 %v4, ptr %out4, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll @@ -25,5 +25,5 @@ -; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 105 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 216 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 -; AVX1: LV: Found an estimated cost of 432 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 29 for VF 2 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 52 for VF 4 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 102 for VF 8 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 210 for VF 16 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX1: LV: Found an estimated cost of 420 for VF 32 For instruction: store i16 %v5, ptr %out5, align 2 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 864 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2 +; AVX512DQ: LV: Found an estimated cost of 852 for VF 64 For instruction: store i16 %v5, ptr %out5, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-7.ll @@ -25,5 +25,5 @@ -; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 252 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX1: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 245 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX1: LV: Found an estimated cost of 490 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 @@ -33,5 +33,5 @@ -; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 252 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX2: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 245 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX2: LV: Found an estimated cost of 490 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 @@ -41,6 +41,6 @@ -; AVX512DQ: LV: Found an estimated cost of 36 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 123 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 249 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 504 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 -; AVX512DQ: LV: Found an estimated cost of 1008 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 65 for VF 4 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 122 for VF 8 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 246 for VF 16 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 497 for VF 32 For instruction: store i16 %v6, ptr %out6, align 2 +; AVX512DQ: LV: Found an estimated cost of 994 for VF 64 For instruction: store i16 %v6, ptr %out6, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-8.ll @@ -25,5 +25,5 @@ -; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 70 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 288 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX1: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX1: LV: Found an estimated cost of 560 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 @@ -33,5 +33,5 @@ -; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 70 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 288 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX2: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX2: LV: Found an estimated cost of 560 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 @@ -41,6 +41,6 @@ -; AVX512DQ: LV: Found an estimated cost of 35 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 69 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 138 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 284 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 576 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 -; AVX512DQ: LV: Found an estimated cost of 1152 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 34 for VF 2 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 68 for VF 4 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 136 for VF 8 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 280 for VF 16 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 568 for VF 32 For instruction: store i16 %v7, ptr %out7, align 2 +; AVX512DQ: LV: Found an estimated cost of 1136 for VF 64 For instruction: store i16 %v7, ptr %out7, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll @@ -25,4 +25,4 @@ -; AVX1: LV: Found an estimated cost of 19 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 40 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 80 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 18 for VF 4 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 38 for VF 8 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 76 for VF 16 For instruction: store i32 %v1, ptr %out1, align 4 +; AVX1: LV: Found an estimated cost of 152 for VF 32 For instruction: store i32 %v1, ptr %out1, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll @@ -24,5 +24,5 @@ -; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 29 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 60 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 120 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4 -; AVX1: LV: Found an estimated cost of 240 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 28 for VF 4 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 57 for VF 8 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 114 for VF 16 For instruction: store i32 %v2, ptr %out2, align 4 +; AVX1: LV: Found an estimated cost of 228 for VF 32 For instruction: store i32 %v2, ptr %out2, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll @@ -24,5 +24,5 @@ -; AVX1: LV: Found an estimated cost of 19 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 38 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 80 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4 -; AVX1: LV: Found an estimated cost of 320 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 18 for VF 2 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 76 for VF 8 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 152 for VF 16 For instruction: store i32 %v3, ptr %out3, align 4 +; AVX1: LV: Found an estimated cost of 304 for VF 32 For instruction: store i32 %v3, ptr %out3, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-5.ll @@ -23,4 +23,4 @@ -; AVX1: LV: Found an estimated cost of 25 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX1: LV: Found an estimated cost of 200 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 24 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 46 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 95 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX1: LV: Found an estimated cost of 190 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 @@ -30,4 +30,4 @@ -; AVX2: LV: Found an estimated cost of 25 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 48 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 100 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 -; AVX2: LV: Found an estimated cost of 200 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 46 for VF 4 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 95 for VF 8 For instruction: store i32 %v4, ptr %out4, align 4 +; AVX2: LV: Found an estimated cost of 190 for VF 16 For instruction: store i32 %v4, ptr %out4, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll @@ -23,4 +23,4 @@ -; AVX1: LV: Found an estimated cost of 29 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 57 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4 -; AVX1: LV: Found an estimated cost of 240 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 114 for VF 8 For instruction: store i32 %v5, ptr %out5, align 4 +; AVX1: LV: Found an estimated cost of 228 for VF 16 For instruction: store i32 %v5, ptr %out5, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-7.ll @@ -23,4 +23,4 @@ -; AVX1: LV: Found an estimated cost of 37 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 67 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 140 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX1: LV: Found an estimated cost of 280 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 35 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 133 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX1: LV: Found an estimated cost of 266 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 @@ -30,4 +30,4 @@ -; AVX2: LV: Found an estimated cost of 37 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 67 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 140 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 -; AVX2: LV: Found an estimated cost of 280 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 35 for VF 2 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 133 for VF 8 For instruction: store i32 %v6, ptr %out6, align 4 +; AVX2: LV: Found an estimated cost of 266 for VF 16 For instruction: store i32 %v6, ptr %out6, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-8.ll @@ -23,4 +23,4 @@ -; AVX1: LV: Found an estimated cost of 38 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 76 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 160 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX1: LV: Found an estimated cost of 320 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 152 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX1: LV: Found an estimated cost of 304 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 @@ -30,4 +30,4 @@ -; AVX2: LV: Found an estimated cost of 38 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 76 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 160 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 -; AVX2: LV: Found an estimated cost of 320 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 72 for VF 4 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 152 for VF 8 For instruction: store i32 %v7, ptr %out7, align 4 +; AVX2: LV: Found an estimated cost of 304 for VF 16 For instruction: store i32 %v7, ptr %out7, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll @@ -24,5 +24,5 @@ -; AVX1: LV: Found an estimated cost of 11 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 24 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 -; AVX1: LV: Found an estimated cost of 192 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 10 for VF 2 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 22 for VF 4 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 44 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8 +; AVX1: LV: Found an estimated cost of 176 for VF 32 For instruction: store i64 %v1, ptr %out1, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll @@ -23,4 +23,4 @@ -; AVX1: LV: Found an estimated cost of 17 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 36 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 72 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 -; AVX1: LV: Found an estimated cost of 144 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 16 for VF 2 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 33 for VF 4 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8 +; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll @@ -23,4 +23,4 @@ -; AVX1: LV: Found an estimated cost of 22 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 48 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 -; AVX1: LV: Found an estimated cost of 192 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 20 for VF 2 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 44 for VF 4 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8 +; AVX1: LV: Found an estimated cost of 176 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-5.ll @@ -22,3 +22,3 @@ -; AVX1: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 60 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX1: LV: Found an estimated cost of 120 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 55 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX1: LV: Found an estimated cost of 110 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 @@ -28,3 +28,3 @@ -; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 60 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 -; AVX2: LV: Found an estimated cost of 120 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 55 for VF 4 For instruction: store i64 %v4, ptr %out4, align 8 +; AVX2: LV: Found an estimated cost of 110 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll @@ -22,3 +22,3 @@ -; AVX1: LV: Found an estimated cost of 33 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 72 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 -; AVX1: LV: Found an estimated cost of 144 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 30 for VF 2 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8 +; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-7.ll @@ -22,3 +22,3 @@ -; AVX1: LV: Found an estimated cost of 39 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 84 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX1: LV: Found an estimated cost of 168 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 36 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 77 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX1: LV: Found an estimated cost of 154 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 @@ -28,3 +28,3 @@ -; AVX2: LV: Found an estimated cost of 39 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 84 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 -; AVX2: LV: Found an estimated cost of 168 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 36 for VF 2 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 77 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8 +; AVX2: LV: Found an estimated cost of 154 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll @@ -22,3 +22,3 @@ -; AVX1: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 96 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX1: LV: Found an estimated cost of 192 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 40 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX1: LV: Found an estimated cost of 176 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 @@ -28,3 +28,3 @@ -; AVX2: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 96 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 -; AVX2: LV: Found an estimated cost of 192 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 40 for VF 2 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 88 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8 +; AVX2: LV: Found an estimated cost of 176 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll @@ -28,2 +28,2 @@ -; AVX1: LV: Found an estimated cost of 67 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 -; AVX1: LV: Found an estimated cost of 136 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX1: LV: Found an estimated cost of 66 for VF 16 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX1: LV: Found an estimated cost of 134 for VF 32 For instruction: store i8 %v1, ptr %out1, align 1 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 272 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1 +; AVX512DQ: LV: Found an estimated cost of 270 for VF 64 For instruction: store i8 %v1, ptr %out1, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll @@ -27,3 +27,3 @@ -; AVX1: LV: Found an estimated cost of 54 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 101 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 -; AVX1: LV: Found an estimated cost of 204 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX1: LV: Found an estimated cost of 53 for VF 8 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX1: LV: Found an estimated cost of 100 for VF 16 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX1: LV: Found an estimated cost of 201 for VF 32 For instruction: store i8 %v2, ptr %out2, align 1 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 408 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1 +; AVX512DQ: LV: Found an estimated cost of 405 for VF 64 For instruction: store i8 %v2, ptr %out2, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll @@ -27,3 +27,3 @@ -; AVX1: LV: Found an estimated cost of 67 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 134 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 -; AVX1: LV: Found an estimated cost of 272 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX1: LV: Found an estimated cost of 66 for VF 8 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX1: LV: Found an estimated cost of 132 for VF 16 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX1: LV: Found an estimated cost of 268 for VF 32 For instruction: store i8 %v3, ptr %out3, align 1 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 544 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1 +; AVX512DQ: LV: Found an estimated cost of 540 for VF 64 For instruction: store i8 %v3, ptr %out3, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-5.ll @@ -26,4 +26,4 @@ -; AVX1: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 85 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 168 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX1: LV: Found an estimated cost of 340 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX1: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX1: LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX1: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX1: LV: Found an estimated cost of 335 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 @@ -34,4 +34,4 @@ -; AVX2: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 85 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 168 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX2: LV: Found an estimated cost of 340 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX2: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX2: LV: Found an estimated cost of 84 for VF 8 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX2: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX2: LV: Found an estimated cost of 335 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 @@ -42 +42 @@ -; AVX512DQ: LV: Found an estimated cost of 47 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: LV: Found an estimated cost of 46 for VF 4 For instruction: store i8 %v4, ptr %out4, align 1 @@ -44,3 +44,3 @@ -; AVX512DQ: LV: Found an estimated cost of 167 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 338 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 -; AVX512DQ: LV: Found an estimated cost of 680 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: LV: Found an estimated cost of 166 for VF 16 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: LV: Found an estimated cost of 336 for VF 32 For instruction: store i8 %v4, ptr %out4, align 1 +; AVX512DQ: LV: Found an estimated cost of 675 for VF 64 For instruction: store i8 %v4, ptr %out4, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll @@ -26,4 +26,4 @@ -; AVX1: LV: Found an estimated cost of 54 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 101 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 201 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 -; AVX1: LV: Found an estimated cost of 408 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX1: LV: Found an estimated cost of 53 for VF 4 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX1: LV: Found an estimated cost of 100 for VF 8 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX1: LV: Found an estimated cost of 198 for VF 16 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX1: LV: Found an estimated cost of 402 for VF 32 For instruction: store i8 %v5, ptr %out5, align 1 @@ -46 +46 @@ -; AVX512DQ: LV: Found an estimated cost of 816 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1 +; AVX512DQ: LV: Found an estimated cost of 810 for VF 64 For instruction: store i8 %v5, ptr %out5, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-7.ll @@ -26,4 +26,4 @@ -; AVX1: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX1: LV: Found an estimated cost of 476 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX1: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX1: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX1: LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX1: LV: Found an estimated cost of 469 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 @@ -34,4 +34,4 @@ -; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX2: LV: Found an estimated cost of 476 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX2: LV: Found an estimated cost of 119 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX2: LV: Found an estimated cost of 232 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX2: LV: Found an estimated cost of 469 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 @@ -42,5 +42,5 @@ -; AVX512DQ: LV: Found an estimated cost of 64 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 122 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 235 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 473 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 -; AVX512DQ: LV: Found an estimated cost of 952 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 63 for VF 4 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 121 for VF 8 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 234 for VF 16 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 470 for VF 32 For instruction: store i8 %v6, ptr %out6, align 1 +; AVX512DQ: LV: Found an estimated cost of 945 for VF 64 For instruction: store i8 %v6, ptr %out6, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-8.ll @@ -26,4 +26,4 @@ -; AVX1: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 134 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 268 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX1: LV: Found an estimated cost of 544 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX1: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX1: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX1: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX1: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 @@ -34,4 +34,4 @@ -; AVX2: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 134 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 268 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX2: LV: Found an estimated cost of 544 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX2: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX2: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX2: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX2: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 @@ -42,5 +42,5 @@ -; AVX512DQ: LV: Found an estimated cost of 67 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 133 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 266 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 540 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 -; AVX512DQ: LV: Found an estimated cost of 1088 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 66 for VF 4 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 132 for VF 8 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 264 for VF 16 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 536 for VF 32 For instruction: store i8 %v7, ptr %out7, align 1 +; AVX512DQ: LV: Found an estimated cost of 1080 for VF 64 For instruction: store i8 %v7, ptr %out7, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll @@ -53 +53 @@ -; AVX512: LV: Found an estimated cost of 23 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8 +; AVX512: LV: Found an estimated cost of 22 for VF 4 For instruction: %valB.loaded = load i64, i64* %inB, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll --- a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll @@ -31,2 +31,2 @@ -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 56 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 -; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 56 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 +; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 55 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -179,2 +179,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -183,2 +183,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -238,2 +238,2 @@ -; KNL-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -242,2 +242,2 @@ -; KNL-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -793,2 +793,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) @@ -797,2 +797,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) @@ -801,2 +801,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) @@ -805,2 +805,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -809,2 +809,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -820,2 +820,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) @@ -824,2 +824,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) @@ -828,2 +828,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) @@ -832,2 +832,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -836,2 +836,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -859,2 +859,2 @@ -; SKL-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -863,2 +863,2 @@ -; SKL-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -879 +879 @@ -; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) @@ -886,2 +886,2 @@ -; KNL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -890,2 +890,2 @@ -; KNL-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -913,2 +913,2 @@ -; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -917,2 +917,2 @@ -; SKX-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -1185,2 +1185,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) @@ -1189,2 +1189,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) @@ -1193,2 +1193,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) @@ -1197,2 +1197,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) @@ -1201,2 +1201,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) @@ -1212,2 +1212,2 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) @@ -1216,2 +1216,2 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) @@ -1220,2 +1220,2 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) @@ -1224,2 +1224,2 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) @@ -1228,2 +1228,2 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) @@ -1792 +1792 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) @@ -1798 +1798 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) @@ -1836 +1836 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) @@ -1842 +1842 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) @@ -1880 +1880 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) @@ -1886 +1886 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) @@ -1930 +1930 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) @@ -1938 +1938 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -179,2 +179,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -183,2 +183,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -238,2 +238,2 @@ -; KNL-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0v16i16(<16 x i16>* undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -242,2 +242,2 @@ -; KNL-NEXT: Cost Model: Found an estimated cost of 263 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -793,2 +793,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) @@ -797,2 +797,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) @@ -801,2 +801,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) @@ -805,2 +805,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -809,2 +809,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX1-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -820,2 +820,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> undef, i32 1, <8 x i1> undef, <8 x float> undef) @@ -824,2 +824,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) @@ -828,2 +828,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> undef, i32 1, <8 x i1> undef, <8 x i32> undef) @@ -832,2 +832,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -836,2 +836,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; AVX2-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -859,2 +859,2 @@ -; SKL-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -863,2 +863,2 @@ -; SKL-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKL-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -879 +879 @@ -; KNL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> undef, i32 1, <4 x i1> undef, <4 x i64> undef) @@ -886,2 +886,2 @@ -; KNL-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -890,2 +890,2 @@ -; KNL-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; KNL-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -913,2 +913,2 @@ -; SKX-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0i16(<32 x i16*> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0i16(<16 x i16*> undef, i32 1, <16 x i1> undef, <16 x i16> undef) @@ -917,2 +917,2 @@ -; SKX-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) -; SKX-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0i8(<64 x i8*> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0i8(<32 x i8*> undef, i32 1, <32 x i1> undef, <32 x i8> undef) @@ -1185,2 +1185,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) @@ -1189,2 +1189,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) @@ -1193,2 +1193,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) @@ -1197,2 +1197,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) @@ -1201,2 +1201,2 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) -; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) @@ -1212,2 +1212,2 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(float* undef, <16 x i1> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(float* undef, <8 x i1> undef, <8 x float> undef) @@ -1216,2 +1216,2 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(i64* undef, <8 x i1> undef, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(i64* undef, <4 x i1> undef, <4 x i64> undef) @@ -1220,2 +1220,2 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(i32* undef, <16 x i1> undef, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(i32* undef, <8 x i1> undef, <8 x i32> undef) @@ -1224,2 +1224,2 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(i16* undef, <32 x i1> undef, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(i16* undef, <16 x i1> undef, <16 x i16> undef) @@ -1228,2 +1228,2 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) -; AVX512-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(i8* undef, <64 x i1> undef, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(i8* undef, <32 x i1> undef, <32 x i8> undef) @@ -1792 +1792 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) @@ -1798 +1798 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> , <16 x float> undef) @@ -1836 +1836 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) @@ -1842 +1842 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) @@ -1880 +1880 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) @@ -1886 +1886 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) @@ -1930 +1930 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) @@ -1938 +1938 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> , <16 x float> undef) diff --git a/llvm/test/Analysis/CostModel/X86/powi.ll b/llvm/test/Analysis/CostModel/X86/powi.ll --- a/llvm/test/Analysis/CostModel/X86/powi.ll +++ b/llvm/test/Analysis/CostModel/X86/powi.ll @@ -38,2 +38,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 %arg) -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 %arg) +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 %arg) +; AVX2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 %arg) @@ -51,2 +51,2 @@ -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 %arg) -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 %arg) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.powi.v8f32.i32(<8 x float> poison, i32 %arg) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.powi.v16f32.i32(<16 x float> poison, i32 %arg) diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i1.ll @@ -79,4 +79,4 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 280 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> @@ -90,4 +90,4 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <256 x i32> @@ -255,5 +255,5 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 208 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> @@ -266,5 +266,5 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 206 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 412 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <24 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <48 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <96 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <192 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <384 x i32> @@ -432,5 +432,5 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 552 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 268 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 536 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> @@ -443,5 +443,5 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 548 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <512 x i32> @@ -608,6 +608,6 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 344 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 688 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 334 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 668 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> @@ -619,6 +619,6 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 342 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 684 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <20 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <40 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <80 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 166 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <160 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 332 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <320 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 664 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <640 x i32> @@ -785,6 +785,6 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 206 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 412 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 824 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 200 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 400 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 800 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> @@ -796,6 +796,6 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 205 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 410 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 820 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <24 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <48 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <96 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 199 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <192 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 398 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <384 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 796 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <768 x i32> @@ -962,6 +962,6 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 240 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 480 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 960 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 233 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 466 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 932 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> @@ -973,6 +973,6 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 239 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 478 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 956 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <28 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <56 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <112 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 232 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <224 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 464 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <448 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 928 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <896 x i32> @@ -1139,6 +1139,6 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 274 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 548 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1096 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 266 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 532 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1064 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> @@ -1150,6 +1150,6 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 137 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 273 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 546 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1092 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i1> undef, <4 x i1> poison, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i1> undef, <8 x i1> poison, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 133 for instruction: %vf16 = shufflevector <16 x i1> undef, <16 x i1> poison, <128 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 265 for instruction: %vf32 = shufflevector <32 x i1> undef, <32 x i1> poison, <256 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 530 for instruction: %vf64 = shufflevector <64 x i1> undef, <64 x i1> poison, <512 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1060 for instruction: %vf128 = shufflevector <128 x i1> undef, <128 x i1> poison, <1024 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i16.ll @@ -69,4 +69,4 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <128 x i32> @@ -179,5 +179,5 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 142 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 284 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> @@ -199,5 +199,5 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 228 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 456 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 111 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 222 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 444 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <192 x i32> @@ -290,5 +290,5 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 356 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <256 x i32> @@ -400,6 +400,6 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 214 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 428 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> @@ -420,6 +420,6 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 93 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 752 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <10 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 183 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 366 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 732 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <320 x i32> @@ -511,6 +511,6 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 500 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> @@ -531,6 +531,6 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 225 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 450 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 900 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <12 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 109 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 219 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 438 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 876 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <384 x i32> @@ -622,6 +622,6 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 572 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 272 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 544 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> @@ -642,6 +642,6 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 524 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1048 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <14 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 255 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 510 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1020 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <448 x i32> @@ -733,6 +733,6 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 161 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 322 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 644 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i16> undef, <2 x i16> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i16> undef, <4 x i16> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i16> undef, <8 x i16> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 153 for instruction: %vf16 = shufflevector <16 x i16> undef, <16 x i16> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 306 for instruction: %vf32 = shufflevector <32 x i16> undef, <32 x i16> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 612 for instruction: %vf64 = shufflevector <64 x i16> undef, <64 x i16> poison, <512 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll @@ -55,4 +55,4 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> @@ -127,5 +127,5 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 156 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> @@ -200,5 +200,5 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> @@ -273,5 +273,5 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 236 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> @@ -346,5 +346,5 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 138 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 276 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> @@ -419,5 +419,5 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 79 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 158 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 316 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 288 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> @@ -492,5 +492,5 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 178 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 356 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll @@ -49,4 +49,4 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> @@ -113,4 +113,4 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> @@ -177,4 +177,4 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 116 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> @@ -241,4 +241,4 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> @@ -305,4 +305,4 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> @@ -369,4 +369,4 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> @@ -433,4 +433,4 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i8.ll @@ -69,4 +69,4 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <256 x i32> @@ -179,5 +179,5 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 270 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 540 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> @@ -199,5 +199,5 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 218 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 436 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 872 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 215 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 430 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 860 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <384 x i32> @@ -290,5 +290,5 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 169 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 338 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 676 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <512 x i32> @@ -400,6 +400,6 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 203 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 406 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 812 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> @@ -420,6 +420,6 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 179 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 720 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1440 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <20 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <40 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 177 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <80 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 355 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <160 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 710 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <320 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1420 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <640 x i32> @@ -511,6 +511,6 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 118 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 237 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 474 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 948 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 115 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> @@ -531,6 +531,6 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 215 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 431 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 862 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1724 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <24 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 106 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <48 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 212 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <96 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 425 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <192 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 850 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <384 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1700 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <768 x i32> @@ -622,6 +622,6 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 69 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 271 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 542 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1084 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 264 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 528 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1056 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> @@ -642,6 +642,6 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 127 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 250 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 502 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1004 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 2008 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <28 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <56 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 247 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <112 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 495 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <224 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 990 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <448 x i32> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1980 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <896 x i32> @@ -733,6 +733,6 @@ -; AVX-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 305 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 610 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 1220 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %vf4 = shufflevector <4 x i8> undef, <4 x i8> poison, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %vf8 = shufflevector <8 x i8> undef, <8 x i8> poison, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %vf16 = shufflevector <16 x i8> undef, <16 x i8> poison, <128 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 297 for instruction: %vf32 = shufflevector <32 x i8> undef, <32 x i8> poison, <256 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 594 for instruction: %vf64 = shufflevector <64 x i8> undef, <64 x i8> poison, <512 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1188 for instruction: %vf128 = shufflevector <128 x i8> undef, <128 x i8> poison, <1024 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/sitofp.ll b/llvm/test/Analysis/CostModel/X86/sitofp.ll --- a/llvm/test/Analysis/CostModel/X86/sitofp.ll +++ b/llvm/test/Analysis/CostModel/X86/sitofp.ll @@ -361,2 +361,2 @@ -; AVX512F-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %cvt_v8i64_v8f32 = sitofp <8 x i64> undef to <8 x float> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %cvt_v16i64_v16f32 = sitofp <16 x i64> undef to <16 x float> diff --git a/llvm/test/Analysis/CostModel/X86/trunc.ll b/llvm/test/Analysis/CostModel/X86/trunc.ll --- a/llvm/test/Analysis/CostModel/X86/trunc.ll +++ b/llvm/test/Analysis/CostModel/X86/trunc.ll @@ -318,3 +318,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> @@ -322,3 +322,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> @@ -326,3 +326,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> @@ -330,3 +330,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> @@ -334,3 +334,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> @@ -338,3 +338,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> @@ -350,3 +350,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> @@ -354,3 +354,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> @@ -358,3 +358,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> @@ -362,3 +362,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> @@ -366,3 +366,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> @@ -370,3 +370,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX1-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX1-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> @@ -385,3 +385,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> @@ -389,3 +389,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> @@ -393,3 +393,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> @@ -397,3 +397,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> @@ -401,3 +401,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> @@ -405,3 +405,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> @@ -417,3 +417,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> @@ -421,3 +421,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> @@ -425,3 +425,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> @@ -429,3 +429,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> @@ -433,3 +433,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> @@ -437,3 +437,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX2-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> @@ -519,3 +519,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> @@ -523,3 +523,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> @@ -527,3 +527,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> @@ -531,3 +531,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> @@ -535,3 +535,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> @@ -539,3 +539,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> @@ -551,3 +551,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> @@ -555,3 +555,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> @@ -559,3 +559,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> @@ -563,3 +563,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> @@ -567,3 +567,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> @@ -571,3 +571,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> @@ -653,3 +653,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> @@ -657,3 +657,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> @@ -661,3 +661,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> @@ -665,3 +665,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> @@ -669,3 +669,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> @@ -673,3 +673,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> @@ -685,3 +685,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> @@ -689,3 +689,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> @@ -693,3 +693,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> @@ -697,3 +697,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> @@ -701,3 +701,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> @@ -705,3 +705,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> @@ -787,3 +787,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> @@ -791,3 +791,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> @@ -795,3 +795,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> @@ -799,3 +799,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> @@ -803,3 +803,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> @@ -807,3 +807,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> @@ -819,3 +819,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> @@ -823,3 +823,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> @@ -827,3 +827,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> @@ -831,3 +831,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> @@ -835,3 +835,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> @@ -839,3 +839,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> @@ -854,3 +854,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i64 = trunc <10 x i64> undef to <10 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i64 = trunc <12 x i64> undef to <12 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i64 = trunc <14 x i64> undef to <14 x i16> @@ -858,3 +858,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i16> @@ -862,3 +862,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i16> @@ -866,3 +866,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i16> @@ -870,3 +870,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i16> @@ -874,3 +874,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i16> @@ -886,3 +886,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V10i32 = trunc <10 x i32> undef to <10 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V12i32 = trunc <12 x i32> undef to <12 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V14i32 = trunc <14 x i32> undef to <14 x i16> @@ -890,3 +890,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i16> @@ -894,3 +894,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 105 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i16> @@ -898,3 +898,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 175 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 210 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 245 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 170 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 238 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i16> @@ -902,3 +902,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 350 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 490 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 340 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 408 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 476 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i16> @@ -906,3 +906,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 700 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 840 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 980 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 680 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 816 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i16> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 952 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i16> @@ -1298,3 +1298,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> @@ -1302,3 +1302,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> @@ -1306,3 +1306,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> @@ -1310,3 +1310,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> @@ -1314,3 +1314,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> @@ -1318,3 +1318,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> @@ -1331,3 +1331,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> @@ -1335,3 +1335,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> @@ -1339,3 +1339,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> @@ -1343,3 +1343,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> @@ -1347,3 +1347,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> @@ -1351,3 +1351,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> @@ -1364,3 +1364,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> @@ -1368,3 +1368,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> @@ -1372,3 +1372,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> @@ -1376,3 +1376,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> @@ -1380,3 +1380,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> @@ -1384,3 +1384,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> @@ -1400,3 +1400,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> @@ -1404,3 +1404,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> @@ -1408,3 +1408,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> @@ -1412,3 +1412,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> @@ -1416,3 +1416,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> @@ -1420,3 +1420,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> @@ -1433,3 +1433,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> @@ -1437,3 +1437,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> @@ -1441,3 +1441,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> @@ -1445,3 +1445,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> @@ -1449,3 +1449,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> @@ -1453,3 +1453,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> @@ -1466,3 +1466,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> @@ -1470,3 +1470,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> @@ -1474,3 +1474,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> @@ -1478,3 +1478,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> @@ -1482,3 +1482,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> @@ -1486,3 +1486,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> @@ -1604,3 +1604,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> @@ -1608,3 +1608,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> @@ -1612,3 +1612,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> @@ -1616,3 +1616,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> @@ -1620,3 +1620,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> @@ -1624,3 +1624,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> @@ -1637,3 +1637,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> @@ -1641,3 +1641,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> @@ -1645,3 +1645,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> @@ -1649,3 +1649,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> @@ -1653,3 +1653,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> @@ -1657,3 +1657,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> @@ -1670,3 +1670,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> @@ -1674,3 +1674,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> @@ -1678,3 +1678,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> @@ -1682,3 +1682,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> @@ -1686,3 +1686,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> @@ -1690,3 +1690,3 @@ -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX512FVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> @@ -1808,3 +1808,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> @@ -1812,3 +1812,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> @@ -1816,3 +1816,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> @@ -1820,3 +1820,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> @@ -1824,3 +1824,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> @@ -1828,3 +1828,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> @@ -1841,3 +1841,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> @@ -1845,3 +1845,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> @@ -1849,3 +1849,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> @@ -1853,3 +1853,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> @@ -1857,3 +1857,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> @@ -1861,3 +1861,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> @@ -1874,3 +1874,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> @@ -1878,3 +1878,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> @@ -1882,3 +1882,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> @@ -1886,3 +1886,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> @@ -1890,3 +1890,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> @@ -1894,3 +1894,3 @@ -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX512DQVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> @@ -2012,3 +2012,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> @@ -2016,3 +2016,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> @@ -2020,3 +2020,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> @@ -2024,3 +2024,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> @@ -2028,3 +2028,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> @@ -2032,3 +2032,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> @@ -2045,3 +2045,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> @@ -2049,3 +2049,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> @@ -2053,3 +2053,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> @@ -2057,3 +2057,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> @@ -2061,3 +2061,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> @@ -2065,3 +2065,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> @@ -2078,3 +2078,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> @@ -2082,3 +2082,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> @@ -2086,3 +2086,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> @@ -2090,3 +2090,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> @@ -2094,3 +2094,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> @@ -2098,3 +2098,3 @@ -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; AVX512BWVEC256-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> @@ -2114,3 +2114,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i8> @@ -2118,3 +2118,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i8> @@ -2122,3 +2122,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i64 = trunc <96 x i64> undef to <96 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i8> @@ -2126,3 +2126,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i8> @@ -2130,3 +2130,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i8> @@ -2134,3 +2134,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i8> @@ -2147,3 +2147,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i8> @@ -2151,3 +2151,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i8> @@ -2155,3 +2155,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i32 = trunc <96 x i32> undef to <96 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i8> @@ -2159,3 +2159,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i8> @@ -2163,3 +2163,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i8> @@ -2167,3 +2167,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i8> @@ -2180,3 +2180,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i8> @@ -2184,3 +2184,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i8> @@ -2188,3 +2188,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 201 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V96i16 = trunc <96 x i16> undef to <96 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i8> @@ -2192,3 +2192,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i8> @@ -2196,3 +2196,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i8> @@ -2200,3 +2200,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i8> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i8> @@ -2754,3 +2754,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> @@ -2758,3 +2758,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> @@ -2762,2 +2762,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> @@ -2765,3 +2765,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> @@ -2769,3 +2769,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> @@ -2773,3 +2773,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> @@ -2789,3 +2789,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> @@ -2793,3 +2793,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> @@ -2797,2 +2797,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> @@ -2800,3 +2800,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> @@ -2804,3 +2804,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> @@ -2808,3 +2808,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> @@ -2824,3 +2824,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> @@ -2828,3 +2828,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> @@ -2832,2 +2832,2 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> @@ -2835,3 +2835,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> @@ -2839,3 +2839,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> @@ -2843,3 +2843,3 @@ -; AVX1-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> -; AVX1-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> @@ -2897,3 +2897,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> @@ -2901,3 +2901,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> @@ -2905,2 +2905,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> @@ -2908,3 +2908,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> @@ -2912,3 +2912,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> @@ -2916,3 +2916,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> @@ -2932,3 +2932,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> @@ -2936,3 +2936,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> @@ -2940,2 +2940,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> @@ -2943,3 +2943,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> @@ -2947,3 +2947,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> @@ -2951,3 +2951,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> @@ -2967,3 +2967,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> @@ -2971,3 +2971,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> @@ -2975,2 +2975,2 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> @@ -2978,3 +2978,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> @@ -2982,3 +2982,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> @@ -2986,3 +2986,3 @@ -; AVX2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> -; AVX2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> @@ -3898,3 +3898,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i64 = trunc <20 x i64> undef to <20 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i64 = trunc <24 x i64> undef to <24 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i64 = trunc <28 x i64> undef to <28 x i1> @@ -3902,3 +3902,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i64 = trunc <40 x i64> undef to <40 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i64 = trunc <48 x i64> undef to <48 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i64 = trunc <56 x i64> undef to <56 x i1> @@ -3906,2 +3906,2 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i64 = trunc <80 x i64> undef to <80 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i64 = trunc <112 x i64> undef to <112 x i1> @@ -3909,3 +3909,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i64 = trunc <160 x i64> undef to <160 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i64 = trunc <192 x i64> undef to <192 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i64 = trunc <224 x i64> undef to <224 x i1> @@ -3913,3 +3913,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i64 = trunc <320 x i64> undef to <320 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i64 = trunc <384 x i64> undef to <384 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i64 = trunc <448 x i64> undef to <448 x i1> @@ -3917,3 +3917,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i64 = trunc <640 x i64> undef to <640 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i64 = trunc <768 x i64> undef to <768 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i64 = trunc <896 x i64> undef to <896 x i1> @@ -3933,3 +3933,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i32 = trunc <20 x i32> undef to <20 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i32 = trunc <24 x i32> undef to <24 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i32 = trunc <28 x i32> undef to <28 x i1> @@ -3937,3 +3937,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i32 = trunc <40 x i32> undef to <40 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i32 = trunc <48 x i32> undef to <48 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i32 = trunc <56 x i32> undef to <56 x i1> @@ -3941,2 +3941,2 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i32 = trunc <80 x i32> undef to <80 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i32 = trunc <112 x i32> undef to <112 x i1> @@ -3944,3 +3944,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i32 = trunc <160 x i32> undef to <160 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i32 = trunc <192 x i32> undef to <192 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i32 = trunc <224 x i32> undef to <224 x i1> @@ -3948,3 +3948,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i32 = trunc <320 x i32> undef to <320 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i32 = trunc <384 x i32> undef to <384 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i32 = trunc <448 x i32> undef to <448 x i1> @@ -3952,3 +3952,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i32 = trunc <640 x i32> undef to <640 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i32 = trunc <768 x i32> undef to <768 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i32 = trunc <896 x i32> undef to <896 x i1> @@ -3968,3 +3968,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V20i16 = trunc <20 x i16> undef to <20 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V24i16 = trunc <24 x i16> undef to <24 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: %V28i16 = trunc <28 x i16> undef to <28 x i1> @@ -3972,3 +3972,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 119 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V40i16 = trunc <40 x i16> undef to <40 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V48i16 = trunc <48 x i16> undef to <48 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 117 for instruction: %V56i16 = trunc <56 x i16> undef to <56 x i1> @@ -3976,2 +3976,2 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 167 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 234 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 165 for instruction: %V80i16 = trunc <80 x i16> undef to <80 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 231 for instruction: %V112i16 = trunc <112 x i16> undef to <112 x i1> @@ -3979,3 +3979,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 335 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 402 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 469 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 330 for instruction: %V160i16 = trunc <160 x i16> undef to <160 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V192i16 = trunc <192 x i16> undef to <192 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 462 for instruction: %V224i16 = trunc <224 x i16> undef to <224 x i1> @@ -3983,3 +3983,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 670 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 804 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 938 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 660 for instruction: %V320i16 = trunc <320 x i16> undef to <320 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 792 for instruction: %V384i16 = trunc <384 x i16> undef to <384 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 924 for instruction: %V448i16 = trunc <448 x i16> undef to <448 x i1> @@ -3987,3 +3987,3 @@ -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1340 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1608 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1876 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1320 for instruction: %V640i16 = trunc <640 x i16> undef to <640 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1584 for instruction: %V768i16 = trunc <768 x i16> undef to <768 x i1> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1848 for instruction: %V896i16 = trunc <896 x i16> undef to <896 x i1> diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll --- a/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vector_ptr_load_store.ll @@ -124 +124 @@ -; CHECK: LV: Selecting VF: 1 +; CHECK: LV: Selecting VF: 4 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-reorder-reuse.ll @@ -7,26 +7,14 @@ -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[ARR:%.*]], i64 1 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[A1:%.*]] -; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[TMP0]], [[A2:%.*]] -; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[TMP0]], [[A3:%.*]] -; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[TMP0]], [[A4:%.*]] -; CHECK-NEXT: [[ADD8:%.*]] = add i32 [[TMP0]], [[A5:%.*]] -; CHECK-NEXT: [[ADD10:%.*]] = add i32 [[TMP0]], [[A6:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARR]], align 4 -; CHECK-NEXT: [[ADD12:%.*]] = add i32 [[TMP1]], [[A7:%.*]] -; CHECK-NEXT: [[ADD14:%.*]] = add i32 [[TMP1]], [[A8:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[ADD]], [[ADD2]] -; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[ADD]], i32 [[ADD2]] -; CHECK-NEXT: [[CMP15:%.*]] = icmp ult i32 [[COND]], [[ADD4]] -; CHECK-NEXT: [[COND19:%.*]] = select i1 [[CMP15]], i32 [[COND]], i32 [[ADD4]] -; CHECK-NEXT: [[CMP20:%.*]] = icmp ult i32 [[COND19]], [[ADD6]] -; CHECK-NEXT: [[COND24:%.*]] = select i1 [[CMP20]], i32 [[COND19]], i32 [[ADD6]] -; CHECK-NEXT: [[CMP25:%.*]] = icmp ult i32 [[COND24]], [[ADD8]] -; CHECK-NEXT: [[COND29:%.*]] = select i1 [[CMP25]], i32 [[COND24]], i32 [[ADD8]] -; CHECK-NEXT: [[CMP30:%.*]] = icmp ult i32 [[COND29]], [[ADD10]] -; CHECK-NEXT: [[COND34:%.*]] = select i1 [[CMP30]], i32 [[COND29]], i32 [[ADD10]] -; CHECK-NEXT: [[CMP35:%.*]] = icmp ult i32 [[COND34]], [[ADD12]] -; CHECK-NEXT: [[COND39:%.*]] = select i1 [[CMP35]], i32 [[COND34]], i32 [[ADD12]] -; CHECK-NEXT: [[CMP40:%.*]] = icmp ult i32 [[COND39]], [[ADD14]] -; CHECK-NEXT: [[COND44:%.*]] = select i1 [[CMP40]], i32 [[COND39]], i32 [[ADD14]] -; CHECK-NEXT: ret i32 [[COND44]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[ARR:%.*]] to <2 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[A2:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[A1:%.*]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 [[A3:%.*]], i32 2 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i32> [[TMP4]], i32 [[A4:%.*]], i32 3 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> [[TMP5]], i32 [[A5:%.*]], i32 4 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i32> [[TMP6]], i32 [[A6:%.*]], i32 5 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x i32> [[TMP7]], i32 [[A7:%.*]], i32 6 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[A8:%.*]], i32 7 +; CHECK-NEXT: [[TMP10:%.*]] = add <8 x i32> [[SHUFFLE]], [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> [[TMP10]]) +; CHECK-NEXT: ret i32 [[TMP11]]