diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -887,6 +887,7 @@ SK_Splice ///< Concatenates elements from the first input vector ///< with elements of the second input vector. Returning ///< a vector of the same type as the input vectors. + ///< Index indicates start offset in first input vector. }; /// Additional information about an operand's possible values. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1244,6 +1244,11 @@ SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands); + if (Shuffle->isSplice(SubIndex)) + return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, + Shuffle->getShuffleMask(), CostKind, + SubIndex, nullptr, Operands); + return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Shuffle->getShuffleMask(), CostKind, 0, nullptr, Operands); diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -902,6 +902,7 @@ // ShuffleVectorInst::isSingleSourceMask). any_of(Mask, [Limit](int I) { return I >= Limit; })) return Kind; + int Index; switch (Kind) { case TTI::SK_PermuteSingleSrc: if (ShuffleVectorInst::isReverseMask(Mask)) @@ -914,6 +915,8 @@ return TTI::SK_Select; if (ShuffleVectorInst::isTransposeMask(Mask)) return TTI::SK_Transpose; + if (ShuffleVectorInst::isSpliceMask(Mask, Index)) + return TTI::SK_Splice; break; case TTI::SK_Select: case TTI::SK_Reverse: diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -2293,6 +2293,26 @@ return !changesLength() && isTransposeMask(ShuffleMask); } + /// Return true if this shuffle mask is a splice mask, concatenating the two + /// inputs together and then extracts an original width vector starting from + /// the splice index. + /// Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4> + static bool isSpliceMask(ArrayRef Mask, int &Index); + static bool isSpliceMask(const Constant *Mask, int &Index) { + assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); + SmallVector MaskAsInts; + getShuffleMask(Mask, MaskAsInts); + return isSpliceMask(MaskAsInts, Index); + } + + /// Return true if this shuffle splices two inputs without changing the length + /// of the vectors. This operation concatenates the two inputs together and + /// then extracts an original width vector starting from the splice index. + /// Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4> + bool isSplice(int &Index) const { + return !changesLength() && isSpliceMask(ShuffleMask, Index); + } + /// Return true if this shuffle mask is an extract subvector mask. /// A valid extract subvector mask returns a smaller vector from a single /// source operand. The base extraction index is returned as well. diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2287,6 +2287,37 @@ return true; } +bool ShuffleVectorInst::isSpliceMask(ArrayRef Mask, int &Index) { + // Example: shufflevector <4 x n> A, <4 x n> B, <1,2,3,4> + int StartIndex = -1; + for (int I = 0, E = Mask.size(); I != E; ++I) { + int MaskEltVal = Mask[I]; + if (MaskEltVal == -1) + continue; + + if (StartIndex == -1) { + // Don't support a StartIndex that begins in the second input, or if the + // first non-undef index would access below the StartIndex. + if (MaskEltVal < I || E <= (MaskEltVal - I)) + return false; + + StartIndex = MaskEltVal - I; + continue; + } + + // Splice is sequential starting from StartIndex. + if (MaskEltVal != (StartIndex + I)) + return false; + } + + if (StartIndex == -1) + return false; + + // NOTE: This accepts StartIndex == 0 (COPY). + Index = StartIndex; + return true; +} + bool ShuffleVectorInst::isExtractSubvectorMask(ArrayRef Mask, int NumSrcElts, int &Index) { // Must extract from a single source. diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1067,8 +1067,7 @@ Kind = improveShuffleKindFromMask(Kind, Mask); // Treat Transpose as 2-op shuffles - there's no difference in lowering. - // TODO: Treat Splice as 2-op shuffles - improve this in the future. - if (Kind == TTI::SK_Transpose || Kind == TTI::SK_Splice) + if (Kind == TTI::SK_Transpose) Kind = TTI::SK_PermuteTwoSrc; // For Broadcasts we are splatting the first element from the first input @@ -1158,6 +1157,11 @@ {TTI::SK_Reverse, MVT::v4i8, 3}, // punpck/pshuflw/packus {TTI::SK_Reverse, MVT::v2i8, 1}, // punpck + {TTI::SK_Splice, MVT::v4i16, 2}, // punpck+psrldq + {TTI::SK_Splice, MVT::v2i16, 2}, // punpck+psrldq + {TTI::SK_Splice, MVT::v4i8, 2}, // punpck+psrldq + {TTI::SK_Splice, MVT::v2i8, 2}, // punpck+psrldq + {TTI::SK_PermuteTwoSrc, MVT::v4i16, 2}, // punpck/pshuflw {TTI::SK_PermuteTwoSrc, MVT::v2i16, 2}, // punpck/pshuflw {TTI::SK_PermuteTwoSrc, MVT::v8i8, 7}, // punpck/pshuflw @@ -1312,6 +1316,10 @@ {TTI::SK_Select, MVT::v32i16, 1}, // vblendmw {TTI::SK_Select, MVT::v64i8, 1}, // vblendmb + + {TTI::SK_Splice, MVT::v32i16, 2}, // vshufi64x2 + palignr + {TTI::SK_Splice, MVT::v32f16, 2}, // vshufi64x2 + palignr + {TTI::SK_Splice, MVT::v64i8, 2}, // vshufi64x2 + palignr }; if (ST->hasBWI()) @@ -1336,6 +1344,18 @@ {TTI::SK_Reverse, MVT::v32f16, 7}, // per mca {TTI::SK_Reverse, MVT::v64i8, 7}, // per mca + {TTI::SK_Splice, MVT::v8f64, 1}, // vpalignd + {TTI::SK_Splice, MVT::v4f64, 1}, // vpalignd + {TTI::SK_Splice, MVT::v16f32, 1}, // vpalignd + {TTI::SK_Splice, MVT::v8f32, 1}, // vpalignd + {TTI::SK_Splice, MVT::v8i64, 1}, // vpalignd + {TTI::SK_Splice, MVT::v4i64, 1}, // vpalignd + {TTI::SK_Splice, MVT::v16i32, 1}, // vpalignd + {TTI::SK_Splice, MVT::v8i32, 1}, // vpalignd + {TTI::SK_Splice, MVT::v32i16, 4}, // split + palignr + {TTI::SK_Splice, MVT::v32f16, 4}, // split + palignr + {TTI::SK_Splice, MVT::v64i8, 4}, // split + palignr + {TTI::SK_PermuteSingleSrc, MVT::v8f64, 1}, // vpermpd {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // vpermpd @@ -1404,7 +1424,13 @@ {TTI::SK_Select, MVT::v16i16, 1}, // vpblendvb {TTI::SK_Select, MVT::v16f16, 1}, // vpblendvb - {TTI::SK_Select, MVT::v32i8, 1}, // vpblendvb + {TTI::SK_Select, MVT::v32i8, 1}, // vpblendvb + + {TTI::SK_Splice, MVT::v8i32, 2}, // vperm2i128 + vpalignr + {TTI::SK_Splice, MVT::v8f32, 2}, // vperm2i128 + vpalignr + {TTI::SK_Splice, MVT::v16i16, 2}, // vperm2i128 + vpalignr + {TTI::SK_Splice, MVT::v16f16, 2}, // vperm2i128 + vpalignr + {TTI::SK_Splice, MVT::v32i8, 2}, // vperm2i128 + vpalignr {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, // vpermpd {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, // vpermps @@ -1483,6 +1509,14 @@ {TTI::SK_Select, MVT::v16f16, 3}, // vpand + vpandn + vpor {TTI::SK_Select, MVT::v32i8, 3}, // vpand + vpandn + vpor + {TTI::SK_Splice, MVT::v4i64, 2}, // vperm2f128 + shufpd + {TTI::SK_Splice, MVT::v4f64, 2}, // vperm2f128 + shufpd + {TTI::SK_Splice, MVT::v8i32, 4}, // 2*vperm2f128 + 2*vshufps + {TTI::SK_Splice, MVT::v8f32, 4}, // 2*vperm2f128 + 2*vshufps + {TTI::SK_Splice, MVT::v16i16, 5}, // 2*vperm2f128 + 2*vpalignr + vinsertf128 + {TTI::SK_Splice, MVT::v16f16, 5}, // 2*vperm2f128 + 2*vpalignr + vinsertf128 + {TTI::SK_Splice, MVT::v32i8, 5}, // 2*vperm2f128 + 2*vpalignr + vinsertf128 + {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, // vperm2f128 + vshufpd {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, // vperm2f128 + vshufpd {TTI::SK_PermuteSingleSrc, MVT::v8f32, 4}, // 2*vperm2f128 + 2*vshufps @@ -1537,6 +1571,12 @@ {TTI::SK_Select, MVT::v8f16, 3}, // 2*pshufb + por {TTI::SK_Select, MVT::v16i8, 3}, // 2*pshufb + por + {TTI::SK_Splice, MVT::v4i32, 1}, // palignr + {TTI::SK_Splice, MVT::v4f32, 1}, // palignr + {TTI::SK_Splice, MVT::v8i16, 1}, // palignr + {TTI::SK_Splice, MVT::v8f16, 1}, // palignr + {TTI::SK_Splice, MVT::v16i8, 1}, // palignr + {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, // pshufb {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, // pshufb {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, // pshufb @@ -1573,6 +1613,13 @@ {TTI::SK_Select, MVT::v8f16, 3}, // pand + pandn + por {TTI::SK_Select, MVT::v16i8, 3}, // pand + pandn + por + {TTI::SK_Splice, MVT::v2i64, 1}, // shufpd + {TTI::SK_Splice, MVT::v2f64, 1}, // shufpd + {TTI::SK_Splice, MVT::v4i32, 2}, // 2*{unpck,movsd,pshufd} + {TTI::SK_Splice, MVT::v8i16, 3}, // psrldq + psrlldq + por + {TTI::SK_Splice, MVT::v8f16, 3}, // psrldq + psrlldq + por + {TTI::SK_Splice, MVT::v16i8, 3}, // psrldq + psrlldq + por + {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, // shufpd {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, // pshufd {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, // pshufd @@ -1615,6 +1662,7 @@ { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps { TTI::SK_Select, MVT::v4f32, 2 }, // 2*shufps + { TTI::SK_Splice, MVT::v4f32, 2 }, // 2*shufps { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // shufps { TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, // 2*shufps }; diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-splice.ll b/llvm/test/Analysis/CostModel/X86/shuffle-splice.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-splice.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-splice.ll @@ -19,14 +19,14 @@ define void @test_vXf64(<2 x double> %a128, <2 x double> %b128, <4 x double> %a256, <4 x double> %b256, <8 x double> %a512, <8 x double> %b512) { ; SSE-LABEL: 'test_vXf64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %a128, <2 x double> %b128, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %a256, <4 x double> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x double> %a512, <8 x double> %b512, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' @@ -44,14 +44,14 @@ define void @test_vXi64(<2 x i64> %a128, <2 x i64> %b128, <4 x i64> %a256, <4 x i64> %b256, <8 x i64> %a512, <8 x i64> %b512) { ; SSE-LABEL: 'test_vXi64' ; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %a128, <2 x i64> %b128, <2 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %a256, <4 x i64> %b256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <8 x i64> %a512, <8 x i64> %b512, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' @@ -67,25 +67,39 @@ } define void @test_vXf32(<2 x float> %a64, <2 x float> %b64, <4 x float> %a128, <4 x float> %b128, <8 x float> %a256, <8 x float> %b256, <16 x float> %a512, <16 x float> %b512) { -; SSE-LABEL: 'test_vXf32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE2-LABEL: 'test_vXf32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXf32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'test_vXf32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' @@ -103,25 +117,39 @@ } define void @test_vXi32(<2 x i32> %a64, <2 x i32> %b64, <4 x i32> %a128, <4 x i32> %b128, <8 x i32> %a256, <8 x i32> %b256, <16 x i32> %a512, <16 x i32> %b512) { -; SSE-LABEL: 'test_vXi32' -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; SSE2-LABEL: 'test_vXi32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSSE3-LABEL: 'test_vXi32' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; SSE42-LABEL: 'test_vXi32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %a64, <2 x i32> %b64, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %a128, <4 x i32> %b128, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %a256, <8 x i32> %b256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <16 x i32> %a512, <16 x i32> %b512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' @@ -142,63 +170,63 @@ ; SSE2-LABEL: 'test_vXi16' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 224 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi16' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <2 x i16> %a32, <2 x i16> %b32, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <4 x i16> %a64, <4 x i16> %b64, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %a128, <8 x i16> %b128, <8 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %a256, <16 x i16> %b256, <16 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %a512, <32 x i16> %b512, <32 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -214,72 +242,72 @@ define void @test_vXi8(<2 x i8> %a16, <2 x i8> %b16, <4 x i8> %a32, <4 x i8> %b32, <8 x i8> %a64, <8 x i8> %b64, <16 x i8> %a128, <16 x i8> %b128, <32 x i8> %a256, <32 x i8> %b256, <64 x i8> %a512, <64 x i8> %b512) { ; SSE2-LABEL: 'test_vXi8' ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 364 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi8' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi8' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VBMI-LABEL: 'test_vXi8' -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> -; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16 = shufflevector <2 x i8> %a16, <2 x i8> %b16, <2 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32 = shufflevector <4 x i8> %a32, <4 x i8> %b32, <4 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <8 x i8> %a64, <8 x i8> %b64, <8 x i32> +; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %a128, <16 x i8> %b128, <16 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %a256, <32 x i8> %b256, <32 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %a512, <64 x i8> %b512, <64 x i32> ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void