diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -1074,15 +1074,24 @@ auto *VecTy = cast(U->getType()); auto *VecSrcTy = cast(U->getOperand(0)->getType()); - int SubIndex; + int NumSubElts, SubIndex; if (Shuffle->changesLength()) { + // Treat a 'subvector widening' as a free shuffle. + if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) + return 0; + if (Shuffle->isExtractSubvectorMask(SubIndex)) return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, Shuffle->getShuffleMask(), SubIndex, VecTy); - // TODO: Identify and add costs for insert subvector, etc. + if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) + return TargetTTI->getShuffleCost( + TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), + SubIndex, + FixedVectorType::get(VecTy->getScalarType(), NumSubElts)); + return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; } @@ -1109,7 +1118,11 @@ return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Shuffle->getShuffleMask(), 0, nullptr); - // TODO: Identify and add costs for insert subvector, etc. + if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) + return TargetTTI->getShuffleCost( + TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), SubIndex, + FixedVectorType::get(VecTy->getScalarType(), NumSubElts)); + return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Shuffle->getShuffleMask(), 0, nullptr); } diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -2306,6 +2306,36 @@ return isExtractSubvectorMask(ShuffleMask, NumSrcElts, Index); } + /// Return true if this shuffle mask is an insert subvector mask. + /// A valid insert subvector mask inserts the lowest elements of a second + /// source operand into an in-place first source operand operand. + /// Both the sub vector width and the insertion index is returned. + static bool isInsertSubvectorMask(ArrayRef Mask, int NumSrcElts, + int &NumSubElts, int &Index); + static bool isInsertSubvectorMask(const Constant *Mask, int NumSrcElts, + int &NumSubElts, int &Index) { + assert(Mask->getType()->isVectorTy() && "Shuffle needs vector constant."); + // Not possible to express a shuffle mask for a scalable vector for this + // case. + if (isa(Mask->getType())) + return false; + SmallVector MaskAsInts; + getShuffleMask(Mask, MaskAsInts); + return isInsertSubvectorMask(MaskAsInts, NumSrcElts, NumSubElts, Index); + } + + /// Return true if this shuffle mask is an insert subvector mask. + bool isInsertSubvectorMask(int &NumSubElts, int &Index) const { + // Not possible to express a shuffle mask for a scalable vector for this + // case. + if (isa(getType())) + return false; + + int NumSrcElts = + cast(Op<0>()->getType())->getNumElements(); + return isInsertSubvectorMask(ShuffleMask, NumSrcElts, NumSubElts, Index); + } + /// Change values in a shuffle permute mask assuming the two vector operands /// of length InVecNumElts have swapped position. static void commuteShuffleMask(MutableArrayRef Mask, diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -2259,6 +2259,80 @@ return false; } +bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef Mask, + int NumSrcElts, int &NumSubElts, + int &Index) { + int NumMaskElts = Mask.size(); + + // Don't try to match if we're shuffling to a smaller size. + if (NumMaskElts < NumSrcElts) + return false; + + // TODO: We don't recognize self-insertion/widening. + if (isSingleSourceMaskImpl(Mask, NumSrcElts)) + return false; + + // Determine which mask elements are attributed to which source. + APInt UndefElts = APInt::getNullValue(NumMaskElts); + APInt Src0Elts = APInt::getNullValue(NumMaskElts); + APInt Src1Elts = APInt::getNullValue(NumMaskElts); + bool Src0Identity = true; + bool Src1Identity = true; + + for (int i = 0; i != NumMaskElts; ++i) { + int M = Mask[i]; + if (M < 0) { + UndefElts.setBit(i); + continue; + } + if (M < NumSrcElts) { + Src0Elts.setBit(i); + Src0Identity &= (M == i); + continue; + } + Src1Elts.setBit(i); + Src1Identity &= (M == (i + NumSrcElts)); + continue; + } + assert((Src0Elts | Src1Elts | UndefElts).isAllOnesValue() && + "unknown shuffle elements"); + assert(!Src0Elts.isNullValue() && !Src1Elts.isNullValue() && + "2-source shuffle not found"); + + // Determine lo/hi span ranges. + // TODO: How should we handle undefs at the start of subvector insertions? + int Src0Lo = Src0Elts.countTrailingZeros(); + int Src1Lo = Src1Elts.countTrailingZeros(); + int Src0Hi = NumMaskElts - Src0Elts.countLeadingZeros(); + int Src1Hi = NumMaskElts - Src1Elts.countLeadingZeros(); + + // If src0 is in place, see if the src1 elements is inplace within its own + // span. + if (Src0Identity) { + int NumSub1Elts = Src1Hi - Src1Lo; + ArrayRef Sub1Mask = Mask.slice(Src1Lo, NumSub1Elts); + if (isIdentityMaskImpl(Sub1Mask, NumSrcElts)) { + NumSubElts = NumSub1Elts; + Index = Src1Lo; + return true; + } + } + + // If src1 is in place, see if the src0 elements is inplace within its own + // span. + if (Src1Identity) { + int NumSub0Elts = Src0Hi - Src0Lo; + ArrayRef Sub0Mask = Mask.slice(Src0Lo, NumSub0Elts); + if (isIdentityMaskImpl(Sub0Mask, NumSrcElts)) { + NumSubElts = NumSub0Elts; + Index = Src0Lo; + return true; + } + } + + return false; +} + bool ShuffleVectorInst::isIdentityWithPadding() const { if (isa(Op<2>())) return false; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -1121,6 +1121,9 @@ if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) return SubLT.first; } + + // If the insertion isn't aligned, treat it like a 2-op shuffles. + Kind = TTI::SK_PermuteTwoSrc; } // Handle some common (illegal) sub-vector types as they are often very cheap diff --git a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll --- a/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/shufflevector.ll @@ -53,8 +53,8 @@ } ; GCN-LABEL: 'shufflevector_xxx' -; TPT: estimated cost of -1 for {{.*}} shufflevector <2 x i8> %vec, <2 x i8> undef, <4 x i32> -; CS: estimated cost of 1 for {{.*}} shufflevector <2 x i8> %vec, <2 x i8> undef, <4 x i32> +; TPT: estimated cost of 0 for {{.*}} shufflevector <2 x i8> %vec, <2 x i8> undef, <4 x i32> +; CS: estimated cost of 0 for {{.*}} shufflevector <2 x i8> %vec, <2 x i8> undef, <4 x i32> ; Should not assert define amdgpu_kernel void @shufflevector_xxx(<4 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %vaddr) { %vec = load <2 x i8>, <2 x i8> addrspace(1)* %vaddr diff --git a/llvm/test/Analysis/CostModel/ARM/shuffle.ll b/llvm/test/Analysis/CostModel/ARM/shuffle.ll --- a/llvm/test/Analysis/CostModel/ARM/shuffle.ll +++ b/llvm/test/Analysis/CostModel/ARM/shuffle.ll @@ -171,39 +171,39 @@ define void @concat() { ; CHECK-MVE-LABEL: 'concat' -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> -; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> +; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> ; CHECK-MVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-NEON-LABEL: 'concat' -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> -; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8i8 = shufflevector <4 x i8> undef, <4 x i8> undef, <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16i8 = shufflevector <8 x i8> undef, <8 x i8> undef, <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i16 = shufflevector <2 x i16> undef, <2 x i16> undef, <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8i16 = shufflevector <4 x i16> undef, <4 x i16> undef, <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16i16 = shufflevector <8 x i16> undef, <8 x i16> undef, <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i32 = shufflevector <2 x i32> undef, <2 x i32> undef, <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8i32 = shufflevector <4 x i32> undef, <4 x i32> undef, <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4i64 = shufflevector <2 x i64> undef, <2 x i64> undef, <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f16 = shufflevector <2 x half> undef, <2 x half> undef, <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f16 = shufflevector <4 x half> undef, <4 x half> undef, <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f16 = shufflevector <8 x half> undef, <8 x half> undef, <16 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f32 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f32 = shufflevector <4 x float> undef, <4 x float> undef, <8 x i32> +; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = shufflevector <2 x double> undef, <2 x double> undef, <4 x i32> ; CHECK-NEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v4i8 = shufflevector <2 x i8> undef, <2 x i8> undef, <4 x i32> diff --git a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll --- a/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll +++ b/llvm/test/Analysis/CostModel/X86/shuffle-insert_subvector.ll @@ -18,37 +18,37 @@ define void @test_vXf64(<2 x double> %src128, <4 x double> %src256, <8 x double> %src512) { ; SSE-LABEL: 'test_vXf64' -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXf64' -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x double> %src512, <8 x double> %src256_512, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf64' -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x double> %src128, <2 x double> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x double> %src128, <2 x double> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x double> %src256, <4 x double> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x double> %src256, <4 x double> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x double> %src512, <8 x double> %src128_512, <8 x i32> @@ -76,37 +76,37 @@ define void @test_vXi64(<2 x i64> %src128, <4 x i64> %src256, <8 x i64> %src512) { ; SSE-LABEL: 'test_vXi64' -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXi64' -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_23 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_67 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <8 x i64> %src512, <8 x i64> %src256_512, <8 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi64' -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <2 x i64> %src128, <2 x i64> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <2 x i64> %src128, <2 x i64> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <4 x i64> %src256, <4 x i64> undef, <8 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_23 = shufflevector <4 x i64> %src256, <4 x i64> %src128_256, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_01 = shufflevector <8 x i64> %src512, <8 x i64> %src128_512, <8 x i32> @@ -134,167 +134,167 @@ define void @test_vXf32(<2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) { ; SSE2-LABEL: 'test_vXf32' -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXf32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXf32' -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXf32' -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXf32' -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x float> %src256, <8 x float> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x float> %src512, <16 x float> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x float> %src512, <16 x float> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXf32' -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x float> %src64, <2 x float> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x float> %src64, <2 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x float> %src64, <2 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x float> %src128, <4 x float> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x float> %src128, <4 x float> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x float> %src256, <8 x float> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x float> %src128, <4 x float> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x float> %src256, <8 x float> %src64_256, <8 x i32> @@ -355,167 +355,167 @@ define void @test_vXi32(<2 x i32> %src64, <4 x i32> %src128, <8 x i32> %src256, <16 x i32> %src512) { ; SSE2-LABEL: 'test_vXi32' -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi32' -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi32' -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi32' -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi32' -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_23 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_45 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_67 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_4567 = shufflevector <8 x i32> %src256, <8 x i32> %src128_256, <8 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_23 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_45 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_67 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89 = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_AB = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CD = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_EF = shufflevector <16 x i32> %src512, <16 x i32> %src64_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_0123 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_4567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89AB = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_CDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512_01234567 = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512_89ABCDEF = shufflevector <16 x i32> %src512, <16 x i32> %src128_512, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512-LABEL: 'test_vXi32' -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> -; AVX512-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <2 x i32> %src64, <2 x i32> undef, <4 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <2 x i32> %src64, <2 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <2 x i32> %src64, <2 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <4 x i32> %src128, <4 x i32> undef, <8 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <4 x i32> %src128, <4 x i32> undef, <16 x i32> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <8 x i32> %src256, <8 x i32> undef, <16 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_23 = shufflevector <4 x i32> %src128, <4 x i32> %src64_128, <4 x i32> ; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01 = shufflevector <8 x i32> %src256, <8 x i32> %src64_256, <8 x i32> @@ -576,16 +576,16 @@ define void @test_vXi16(<2 x i16> %src32, <4 x i16> %src64, <8 x i16> %src128, <16x i16> %src256, <32 x i16> %src512) { ; SSE2-LABEL: 'test_vXi16' -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -598,29 +598,29 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi16' -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -633,29 +633,29 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi16' -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -668,29 +668,29 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX1-LABEL: 'test_vXi16' -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -703,29 +703,29 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX2-LABEL: 'test_vXi16' -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -738,29 +738,29 @@ ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi16' -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -773,29 +773,29 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi16' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -808,29 +808,29 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VMBI-LABEL: 'test_vXi16' -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <2 x i16> %src32, <2 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <2 x i16> %src32, <2 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <2 x i16> %src32, <2 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <4 x i16> %src64, <4 x i16> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <4 x i16> %src64, <4 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <4 x i16> %src64, <4 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <8 x i16> %src128, <8 x i16> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <8 x i16> %src128, <8 x i16> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <16 x i16> %src256, <16 x i16> undef, <32 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V64_23 = shufflevector <4 x i16> %src64, <4 x i16> %src32_64, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128_01 = shufflevector <8 x i16> %src128, <8 x i16> %src32_128, <8 x i32> @@ -843,16 +843,16 @@ ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_23 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_45 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_67 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89 = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_AB = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CD = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_EF = shufflevector <16 x i16> %src256, <16 x i16> %src64_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_0123 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_4567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89AB = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_CDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_01234567 = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256_89ABCDEF = shufflevector <16 x i16> %src256, <16 x i16> %src128_256, <16 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %src32_64 = shufflevector <2 x i16> %src32, <2 x i16> undef, <4 x i32> @@ -896,21 +896,21 @@ define void @test_vXi8(<2 x i8> %src16, <4 x i8> %src32, <8 x i8> %src64, <16x i8> %src128, <32 x i8> %src256, <64 x i8> %src512) { ; SSE2-LABEL: 'test_vXi8' -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSE2-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -936,21 +936,21 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSSE3-LABEL: 'test_vXi8' -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSSE3-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -976,21 +976,21 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; SSE42-LABEL: 'test_vXi8' -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; SSE42-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1016,21 +1016,21 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX-LABEL: 'test_vXi8' -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1056,21 +1056,21 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512F-LABEL: 'test_vXi8' -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512F-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512F-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1096,21 +1096,21 @@ ; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512BW-LABEL: 'test_vXi8' -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512BW-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32> @@ -1136,21 +1136,21 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; AVX512VMBI-LABEL: 'test_vXi8' -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> -; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of -1 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_32 = shufflevector <2 x i8> %src16, <2 x i8> undef, <4 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_64 = shufflevector <2 x i8> %src16, <2 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_128 = shufflevector <2 x i8> %src16, <2 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_256 = shufflevector <2 x i8> %src16, <2 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src16_512 = shufflevector <2 x i8> %src16, <2 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_64 = shufflevector <4 x i8> %src32, <4 x i8> undef, <8 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_128 = shufflevector <4 x i8> %src32, <4 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_256 = shufflevector <4 x i8> %src32, <4 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src32_512 = shufflevector <4 x i8> %src32, <4 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_128 = shufflevector <8 x i8> %src64, <8 x i8> undef, <16 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_256 = shufflevector <8 x i8> %src64, <8 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src64_512 = shufflevector <8 x i8> %src64, <8 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_256 = shufflevector <16 x i8> %src128, <16 x i8> undef, <32 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src128_512 = shufflevector <16 x i8> %src128, <16 x i8> undef, <64 x i32> +; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %src256_512 = shufflevector <32 x i8> %src256, <32 x i8> undef, <64 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32_01 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V32_23 = shufflevector <4 x i8> %src32, <4 x i8> %src16_32, <4 x i32> ; AVX512VMBI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64_01 = shufflevector <8 x i8> %src64, <8 x i8> %src32_64, <8 x i32>