Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h @@ -641,7 +641,9 @@ enum ShuffleKind { SK_Broadcast, ///< Broadcast element 0 to all other elements. SK_Reverse, ///< Reverse the order of the vector. - SK_Alternate, ///< Choose alternate elements from vector. + SK_Select, ///< Selects elements from the corresponding lane of + ///< either source operand. This is equivalent to a + ///< vector select with a constant condition operand. SK_Transpose, ///< Transpose two vectors. SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset. SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset. Index: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h +++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h @@ -554,7 +554,7 @@ unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { switch (Kind) { - case TTI::SK_Alternate: + case TTI::SK_Select: case TTI::SK_Transpose: case TTI::SK_PermuteSingleSrc: case TTI::SK_PermuteTwoSrc: Index: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp +++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp @@ -674,29 +674,25 @@ return IdentityLHS || IdentityRHS; } -static bool isAlternateVectorMask(ArrayRef Mask) { - bool isAlternate = true; +static bool isSelectVectorMask(ArrayRef Mask) { + bool IsSelect = true; + bool FoundLHS = false; + bool FoundRHS = false; unsigned MaskSize = Mask.size(); - // Example: shufflevector A, B, <0,5,2,7> - for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { - if (Mask[i] < 0) - continue; - isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); - } - - if (isAlternate) - return true; - - isAlternate = true; + // Example: shufflevector A, B, <0,1,6,3> // Example: shufflevector A, B, <4,1,6,3> - for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + for (unsigned i = 0; i < MaskSize && IsSelect; ++i) { if (Mask[i] < 0) continue; - isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); + bool IsLHS = (Mask[i] == (int)i); + bool IsRHS = (Mask[i] == (int)(i + MaskSize)); + FoundLHS |= IsLHS; + FoundRHS |= IsRHS; + IsSelect = IsLHS || IsRHS; } - - return isAlternate; + // If we don't use both vectors this is really an Identity mask. + return IsSelect && FoundLHS && FoundRHS; } static bool isTransposeVectorMask(ArrayRef Mask) { @@ -1236,8 +1232,8 @@ return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, nullptr); - if (isAlternateVectorMask(Mask)) - return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Alternate, + if (isSelectVectorMask(Mask)) + return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Select, VecTypOp0, 0, nullptr); if (isTransposeVectorMask(Mask)) Index: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -400,8 +400,8 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - // We only handle costs of reverse and alternate shuffles for now. - if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate) + // We only handle costs of reverse and select shuffles for now. + if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Select) return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); if (Kind == TTI::SK_Reverse) { @@ -426,9 +426,9 @@ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } - if (Kind == TTI::SK_Alternate) { - static const CostTblEntry NEONAltShuffleTbl[] = { - // Alt shuffle cost table for ARM. Cost is the number of instructions + if (Kind == TTI::SK_Select) { + static const CostTblEntry NEONSelShuffleTbl[] = { + // Select shuffle cost table for ARM. Cost is the number of instructions // required to create the shuffled vector. {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, @@ -445,7 +445,7 @@ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl, + if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) return LT.first * Entry->Cost; return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); Index: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp +++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp @@ -912,8 +912,8 @@ { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb - { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw - { TTI::SK_Alternate, MVT::v32i8, 1 }, // vpblendvb + { TTI::SK_Select, MVT::v16i16, 1 }, // vpblendvb + { TTI::SK_Select, MVT::v32i8, 1 }, // vpblendvb { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps @@ -977,12 +977,12 @@ { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb // + vinsertf128 - { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd - { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd - { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps - { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps - { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor - { TTI::SK_Alternate, MVT::v32i8, 3 }, // vpand + vpandn + vpor + { TTI::SK_Select, MVT::v4i64, 1 }, // vblendpd + { TTI::SK_Select, MVT::v4f64, 1 }, // vblendpd + { TTI::SK_Select, MVT::v8i32, 1 }, // vblendps + { TTI::SK_Select, MVT::v8f32, 1 }, // vblendps + { TTI::SK_Select, MVT::v16i16, 3 }, // vpand + vpandn + vpor + { TTI::SK_Select, MVT::v32i8, 3 }, // vpand + vpandn + vpor { TTI::SK_PermuteSingleSrc, MVT::v4f64, 3 }, // 2*vperm2f128 + vshufpd { TTI::SK_PermuteSingleSrc, MVT::v4i64, 3 }, // 2*vperm2f128 + vshufpd @@ -1008,12 +1008,12 @@ return LT.first * Entry->Cost; static const CostTblEntry SSE41ShuffleTbl[] = { - { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw - { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd - { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw - { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps - { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw - { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb + { TTI::SK_Select, MVT::v2i64, 1 }, // pblendw + { TTI::SK_Select, MVT::v2f64, 1 }, // movsd + { TTI::SK_Select, MVT::v4i32, 1 }, // pblendw + { TTI::SK_Select, MVT::v4f32, 1 }, // blendps + { TTI::SK_Select, MVT::v8i16, 1 }, // pblendw + { TTI::SK_Select, MVT::v16i8, 1 } // pblendvb }; if (ST->hasSSE41()) @@ -1027,8 +1027,8 @@ { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb - { TTI::SK_Alternate, MVT::v8i16, 3 }, // 2*pshufb + por - { TTI::SK_Alternate, MVT::v16i8, 3 }, // 2*pshufb + por + { TTI::SK_Select, MVT::v8i16, 3 }, // 2*pshufb + por + { TTI::SK_Select, MVT::v16i8, 3 }, // 2*pshufb + por { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb @@ -1055,11 +1055,11 @@ { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw // + 2*pshufd + 2*unpck + packus - { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd - { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd - { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps - { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por - { TTI::SK_Alternate, MVT::v16i8, 3 }, // pand + pandn + por + { TTI::SK_Select, MVT::v2i64, 1 }, // movsd + { TTI::SK_Select, MVT::v2f64, 1 }, // movsd + { TTI::SK_Select, MVT::v4i32, 2 }, // 2*shufps + { TTI::SK_Select, MVT::v8i16, 3 }, // pand + pandn + por + { TTI::SK_Select, MVT::v16i8, 3 }, // pand + pandn + por { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // shufpd { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd @@ -1083,7 +1083,7 @@ static const CostTblEntry SSE1ShuffleTbl[] = { { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps - { TTI::SK_Alternate, MVT::v4f32, 2 }, // 2*shufps + { TTI::SK_Select, MVT::v4f32, 2 }, // 2*shufps { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // shufps { TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, // 2*shufps }; @@ -1941,8 +1941,8 @@ if (VT.isSimple() && LT.second != VT.getSimpleVT() && LT.second.getVectorNumElements() == NumElem) // Promotion requires expand/truncate for data and a shuffle for mask. - Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, nullptr) + - getShuffleCost(TTI::SK_Alternate, MaskTy, 0, nullptr); + Cost += getShuffleCost(TTI::SK_Select, SrcVTy, 0, nullptr) + + getShuffleCost(TTI::SK_Select, MaskTy, 0, nullptr); else if (LT.second.getVectorNumElements() > NumElem) { VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(), Index: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -313,7 +313,7 @@ if ((CommonShuffleMode == FirstAlternate || CommonShuffleMode == SecondAlternate) && Vec2) - return TargetTransformInfo::SK_Alternate; + return TargetTransformInfo::SK_Select; // If Vec2 was never used, we have a permutation of a single vector, otherwise // we have permutation of 2 vectors. return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc @@ -2461,8 +2461,7 @@ Instruction *I1 = cast(VL[1]); VecCost += TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK); - VecCost += - TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0); + VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0); return ReuseShuffleCost + VecCost - ScalarCost; } default: Index: llvm/trunk/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll =================================================================== --- llvm/trunk/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll +++ llvm/trunk/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll @@ -200,12 +200,24 @@ } define <4 x i32> @test_v4i32_3(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: 'test_v4i32_3' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1 +; SSE2-LABEL: 'test_v4i32_3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1 +; +; SSSE3-LABEL: 'test_v4i32_3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1 +; +; SSE42-LABEL: 'test_v4i32_3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1 +; +; AVX-LABEL: 'test_v4i32_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1 ; ; BTVER2-LABEL: 'test_v4i32_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1 ; %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -263,12 +275,24 @@ } define <4 x float> @test_v4f32_3(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: 'test_v4f32_3' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1 +; SSE2-LABEL: 'test_v4f32_3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1 +; +; SSSE3-LABEL: 'test_v4f32_3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1 +; +; SSE42-LABEL: 'test_v4f32_3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1 +; +; AVX-LABEL: 'test_v4f32_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1 ; ; BTVER2-LABEL: 'test_v4f32_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1 ; %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -311,27 +335,15 @@ define <4 x i64> @test_v4i64_3(<4 x i64> %a, <4 x i64> %b) { ; SSE-LABEL: 'test_v4i64_3' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 ; -; AVX1-LABEL: 'test_v4i64_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 -; -; AVX2-LABEL: 'test_v4i64_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 -; -; XOPAVX1-LABEL: 'test_v4i64_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 -; -; XOPAVX2-LABEL: 'test_v4i64_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 +; AVX-LABEL: 'test_v4i64_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 ; ; BTVER2-LABEL: 'test_v4i64_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 ; %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -374,27 +386,15 @@ define <4 x double> @test_v4f64_3(<4 x double> %a, <4 x double> %b) { ; SSE-LABEL: 'test_v4f64_3' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 ; -; AVX1-LABEL: 'test_v4f64_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 -; -; AVX2-LABEL: 'test_v4f64_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 -; -; XOPAVX1-LABEL: 'test_v4f64_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 -; -; XOPAVX2-LABEL: 'test_v4f64_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 +; AVX-LABEL: 'test_v4f64_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 ; ; BTVER2-LABEL: 'test_v4f64_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 ; %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> @@ -455,7 +455,7 @@ define <8 x i16> @test_v8i16_3(<8 x i16> %a, <8 x i16> %b) { ; SSE2-LABEL: 'test_v8i16_3' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 ; ; SSSE3-LABEL: 'test_v8i16_3' @@ -463,27 +463,15 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 ; ; SSE42-LABEL: 'test_v8i16_3' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 ; -; AVX1-LABEL: 'test_v8i16_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 -; -; AVX2-LABEL: 'test_v8i16_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 -; -; XOPAVX1-LABEL: 'test_v8i16_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 -; -; XOPAVX2-LABEL: 'test_v8i16_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 +; AVX-LABEL: 'test_v8i16_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 ; ; BTVER2-LABEL: 'test_v8i16_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 ; %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -541,28 +529,24 @@ } define <8 x i32> @test_v8i32_3(<8 x i32> %a, <8 x i32> %b) { -; SSE-LABEL: 'test_v8i32_3' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 -; -; AVX1-LABEL: 'test_v8i32_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 -; -; AVX2-LABEL: 'test_v8i32_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 -; -; XOPAVX1-LABEL: 'test_v8i32_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 -; -; XOPAVX2-LABEL: 'test_v8i32_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 +; SSE2-LABEL: 'test_v8i32_3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 +; +; SSSE3-LABEL: 'test_v8i32_3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 +; +; SSE42-LABEL: 'test_v8i32_3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 +; +; AVX-LABEL: 'test_v8i32_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 ; ; BTVER2-LABEL: 'test_v8i32_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 ; %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -620,28 +604,24 @@ } define <8 x float> @test_v8f32_3(<8 x float> %a, <8 x float> %b) { -; SSE-LABEL: 'test_v8f32_3' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 -; -; AVX1-LABEL: 'test_v8f32_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 -; -; AVX2-LABEL: 'test_v8f32_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 -; -; XOPAVX1-LABEL: 'test_v8f32_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 -; -; XOPAVX2-LABEL: 'test_v8f32_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 +; SSE2-LABEL: 'test_v8f32_3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 +; +; SSSE3-LABEL: 'test_v8f32_3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 +; +; SSE42-LABEL: 'test_v8f32_3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 +; +; AVX-LABEL: 'test_v8f32_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 ; ; BTVER2-LABEL: 'test_v8f32_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 ; %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> @@ -702,7 +682,7 @@ define <16 x i8> @test_v16i8_3(<16 x i8> %a, <16 x i8> %b) { ; SSE2-LABEL: 'test_v16i8_3' -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; ; SSSE3-LABEL: 'test_v16i8_3' @@ -710,27 +690,15 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; ; SSE42-LABEL: 'test_v16i8_3' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; -; AVX1-LABEL: 'test_v16i8_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 -; -; AVX2-LABEL: 'test_v16i8_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 -; -; XOPAVX1-LABEL: 'test_v16i8_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 -; -; XOPAVX2-LABEL: 'test_v16i8_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 +; AVX-LABEL: 'test_v16i8_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; ; BTVER2-LABEL: 'test_v16i8_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -813,35 +781,35 @@ define <16 x i16> @test_v16i16_3(<16 x i16> %a, <16 x i16> %b) { ; SSE2-LABEL: 'test_v16i16_3' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; SSSE3-LABEL: 'test_v16i16_3' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; SSE42-LABEL: 'test_v16i16_3' -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; AVX1-LABEL: 'test_v16i16_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; AVX2-LABEL: 'test_v16i16_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; XOPAVX1-LABEL: 'test_v16i16_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; XOPAVX2-LABEL: 'test_v16i16_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; BTVER2-LABEL: 'test_v16i16_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -924,35 +892,35 @@ define <32 x i8> @test_v32i8_3(<32 x i8> %a, <32 x i8> %b) { ; SSE2-LABEL: 'test_v32i8_3' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; SSSE3-LABEL: 'test_v32i8_3' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; SSE42-LABEL: 'test_v32i8_3' -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; AVX1-LABEL: 'test_v32i8_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; AVX2-LABEL: 'test_v32i8_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; XOPAVX1-LABEL: 'test_v32i8_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; XOPAVX2-LABEL: 'test_v32i8_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; BTVER2-LABEL: 'test_v32i8_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32>