diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -145,6 +145,10 @@ InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract); + InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, + int VF, + const APInt &DemandedReplicatedElts, + TTI::TargetCostKind CostKind); InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3621,6 +3621,68 @@ return Cost; } +InstructionCost X86TTIImpl::getReplicationShuffleCost( + Type *EltTy, int ReplicationFactor, int VF, + const APInt &DemandedReplicatedElts, TTI::TargetCostKind CostKind) { + const unsigned EltTyBits = DL.getTypeSizeInBits(EltTy); + + auto bailout = [&]() { + return BaseT::getReplicationShuffleCost(EltTy, ReplicationFactor, VF, + DemandedReplicatedElts, CostKind); + }; + + // For now, only deal with AVX512 cases. + if (!ST->hasAVX512()) + return bailout(); + + switch (EltTyBits) { + case 32: + case 64: + break; // AVX512F. + default: + return bailout(); + } + + auto *SrcVecTy = FixedVectorType::get(EltTy, VF); + int NumReplicatedElements = VF * ReplicationFactor; + auto *ReplicatedVecTy = FixedVectorType::get(EltTy, NumReplicatedElements); + + // Legalize the types. + MVT LegalSrcVecTy = TLI->getTypeLegalizationCost(DL, SrcVecTy).second; + MVT LegalReplicatedVecTy = + TLI->getTypeLegalizationCost(DL, ReplicatedVecTy).second; + + // They both should have legalized into vector types. + if (!LegalSrcVecTy.isVector() || !LegalReplicatedVecTy.isVector()) + return bailout(); + + assert(LegalSrcVecTy.getScalarSizeInBits() == EltTyBits && + LegalSrcVecTy.getScalarType() == + LegalReplicatedVecTy.getScalarType() && + "We expect that the legalization doesn't affect the element width, " + "doesn't coalesce/split elements."); + + unsigned NumEltsPerReplicatedVec = + LegalReplicatedVecTy.getVectorNumElements(); + unsigned NumReplicatedVectors = + divideCeil(ReplicatedVecTy->getNumElements(), NumEltsPerReplicatedVec); + + auto *SingleReplicatedVecTy = + FixedVectorType::get(EltTy, NumEltsPerReplicatedVec); + + APInt DemandedReplicatedVectors = APIntOps::ScaleBitMask( + DemandedReplicatedElts.zextOrSelf(NumReplicatedVectors * + NumEltsPerReplicatedVec), + NumReplicatedVectors); + unsigned NumReplicatedVectorsDemanded = + DemandedReplicatedVectors.countPopulation(); + + InstructionCost SingleShuffleCost = + getShuffleCost(TTI::SK_PermuteSingleSrc, SingleReplicatedVecTy, + /*Mask=*/None, /*Index=*/0, /*SubTp=*/nullptr); + return NumReplicatedVectorsDemanded * SingleShuffleCost; +} + InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i32.ll @@ -59,11 +59,11 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i32_stride2' -; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 136 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <64 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <4 x i32> @@ -124,11 +124,11 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i32_stride3' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <12 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <24 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <48 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <96 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <6 x i32> @@ -189,11 +189,11 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i32_stride4' -; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <128 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <8 x i32> @@ -254,11 +254,11 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i32_stride5' -; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 256 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <20 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <40 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <80 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <160 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <10 x i32> @@ -319,11 +319,11 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i32_stride6' -; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 296 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <24 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <48 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <96 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <192 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <12 x i32> @@ -384,11 +384,11 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i32_stride7' -; AVX512-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 82 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 168 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 336 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <28 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <56 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <112 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <224 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <14 x i32> @@ -449,11 +449,11 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i32_stride8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 376 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i32> undef, <4 x i32> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i32> undef, <8 x i32> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i32> undef, <16 x i32> poison, <128 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf32 = shufflevector <32 x i32> undef, <32 x i32> poison, <256 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i32> undef, <2 x i32> poison, <16 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-replication-i64.ll @@ -53,10 +53,10 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i64_stride2' -; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> @@ -110,10 +110,10 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i64_stride3' -; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <12 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <24 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <48 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <6 x i32> @@ -167,10 +167,10 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i64_stride4' -; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <64 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <8 x i32> @@ -224,10 +224,10 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i64_stride5' -; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 148 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <20 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <40 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <80 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <10 x i32> @@ -281,10 +281,10 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i64_stride6' -; AVX512-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 172 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <24 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <48 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <96 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <12 x i32> @@ -338,10 +338,10 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i64_stride7' -; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <28 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <56 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <112 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <14 x i32> @@ -395,10 +395,10 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'replication_i64_stride8' -; AVX512-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <32 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <64 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <128 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <16 x i32>