Index: include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- include/llvm/Analysis/TargetTransformInfo.h +++ include/llvm/Analysis/TargetTransformInfo.h @@ -641,7 +641,7 @@ enum ShuffleKind { SK_Broadcast, ///< Broadcast element 0 to all other elements. SK_Reverse, ///< Reverse the order of the vector. - SK_Alternate, ///< Choose alternate elements from vector. + SK_Select, ///< Select either vectors' element from that lane. SK_Transpose, ///< Transpose two vectors. SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset. SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset. Index: include/llvm/CodeGen/BasicTTIImpl.h =================================================================== --- include/llvm/CodeGen/BasicTTIImpl.h +++ include/llvm/CodeGen/BasicTTIImpl.h @@ -554,7 +554,7 @@ unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { switch (Kind) { - case TTI::SK_Alternate: + case TTI::SK_Select: case TTI::SK_Transpose: case TTI::SK_PermuteSingleSrc: case TTI::SK_PermuteTwoSrc: Index: lib/Analysis/TargetTransformInfo.cpp =================================================================== --- lib/Analysis/TargetTransformInfo.cpp +++ lib/Analysis/TargetTransformInfo.cpp @@ -658,29 +658,23 @@ return true; } -static bool isAlternateVectorMask(ArrayRef Mask) { - bool isAlternate = true; +static bool isSelectVectorMask(ArrayRef Mask) { + bool isSelect = true; + bool FoundLHS = false; + bool FoundRHS = false; unsigned MaskSize = Mask.size(); - // Example: shufflevector A, B, <0,5,2,7> - for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { - if (Mask[i] < 0) - continue; - isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); - } - - if (isAlternate) - return true; - - isAlternate = true; + // Example: shufflevector A, B, <0,1,6,3> // Example: shufflevector A, B, <4,1,6,3> - for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + for (unsigned i = 0; i < MaskSize && isSelect; ++i) { if (Mask[i] < 0) continue; - isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); + FoundLHS |= (Mask[i] == i); + FoundRHS |= (Mask[i] == (i + MaskSize)); + isSelect = (Mask[i] == i) || (Mask[i] == (i + MaskSize)); } - - return isAlternate; + // If we don't use both vectors this is really an Identity mask. + return isSelect && FoundLHS && FoundRHS; } static bool isTransposeVectorMask(ArrayRef Mask) { @@ -1213,11 +1207,12 @@ SmallVector Mask = Shuffle->getShuffleMask(); if (NumVecElems == Mask.size()) { + // TODO - should we test for Identity shuffles (cost = 0)? if (isReverseVectorMask(Mask)) return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, nullptr); - if (isAlternateVectorMask(Mask)) - return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Alternate, + if (isSelectVectorMask(Mask)) + return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Select, VecTypOp0, 0, nullptr); if (isTransposeVectorMask(Mask)) Index: lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.cpp +++ lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -400,8 +400,8 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) { - // We only handle costs of reverse and alternate shuffles for now. - if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate) + // We only handle costs of reverse and select shuffles for now. + if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Select) return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); if (Kind == TTI::SK_Reverse) { @@ -426,9 +426,9 @@ return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); } - if (Kind == TTI::SK_Alternate) { - static const CostTblEntry NEONAltShuffleTbl[] = { - // Alt shuffle cost table for ARM. Cost is the number of instructions + if (Kind == TTI::SK_Select) { + static const CostTblEntry NEONSelShuffleTbl[] = { + // Select shuffle cost table for ARM. Cost is the number of instructions // required to create the shuffled vector. {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, @@ -445,7 +445,7 @@ {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; std::pair LT = TLI->getTypeLegalizationCost(DL, Tp); - if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl, + if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) return LT.first * Entry->Cost; return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -912,8 +912,8 @@ { TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb { TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb - { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw - { TTI::SK_Alternate, MVT::v32i8, 1 }, // vpblendvb + { TTI::SK_Select, MVT::v16i16, 1 }, // vpblendvb + { TTI::SK_Select, MVT::v32i8, 1 }, // vpblendvb { TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd { TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps @@ -977,12 +977,12 @@ { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb // + vinsertf128 - { TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd - { TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd - { TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps - { TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps - { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor - { TTI::SK_Alternate, MVT::v32i8, 3 }, // vpand + vpandn + vpor + { TTI::SK_Select, MVT::v4i64, 1 }, // vblendpd + { TTI::SK_Select, MVT::v4f64, 1 }, // vblendpd + { TTI::SK_Select, MVT::v8i32, 1 }, // vblendps + { TTI::SK_Select, MVT::v8f32, 1 }, // vblendps + { TTI::SK_Select, MVT::v16i16, 3 }, // vpand + vpandn + vpor + { TTI::SK_Select, MVT::v32i8, 3 }, // vpand + vpandn + vpor { TTI::SK_PermuteSingleSrc, MVT::v4f64, 3 }, // 2*vperm2f128 + vshufpd { TTI::SK_PermuteSingleSrc, MVT::v4i64, 3 }, // 2*vperm2f128 + vshufpd @@ -1008,12 +1008,12 @@ return LT.first * Entry->Cost; static const CostTblEntry SSE41ShuffleTbl[] = { - { TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw - { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd - { TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw - { TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps - { TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw - { TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb + { TTI::SK_Select, MVT::v2i64, 1 }, // pblendw + { TTI::SK_Select, MVT::v2f64, 1 }, // movsd + { TTI::SK_Select, MVT::v4i32, 1 }, // pblendw + { TTI::SK_Select, MVT::v4f32, 1 }, // blendps + { TTI::SK_Select, MVT::v8i16, 1 }, // pblendw + { TTI::SK_Select, MVT::v16i8, 1 } // pblendvb }; if (ST->hasSSE41()) @@ -1027,8 +1027,8 @@ { TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb { TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb - { TTI::SK_Alternate, MVT::v8i16, 3 }, // 2*pshufb + por - { TTI::SK_Alternate, MVT::v16i8, 3 }, // 2*pshufb + por + { TTI::SK_Select, MVT::v8i16, 3 }, // 2*pshufb + por + { TTI::SK_Select, MVT::v16i8, 3 }, // 2*pshufb + por { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb @@ -1055,11 +1055,11 @@ { TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw // + 2*pshufd + 2*unpck + packus - { TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd - { TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd - { TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps - { TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por - { TTI::SK_Alternate, MVT::v16i8, 3 }, // pand + pandn + por + { TTI::SK_Select, MVT::v2i64, 1 }, // movsd + { TTI::SK_Select, MVT::v2f64, 1 }, // movsd + { TTI::SK_Select, MVT::v4i32, 2 }, // 2*shufps + { TTI::SK_Select, MVT::v8i16, 3 }, // pand + pandn + por + { TTI::SK_Select, MVT::v16i8, 3 }, // pand + pandn + por { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // shufpd { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd @@ -1083,7 +1083,7 @@ static const CostTblEntry SSE1ShuffleTbl[] = { { TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps { TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps - { TTI::SK_Alternate, MVT::v4f32, 2 }, // 2*shufps + { TTI::SK_Select, MVT::v4f32, 2 }, // 2*shufps { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // shufps { TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, // 2*shufps }; @@ -1941,8 +1941,8 @@ if (VT.isSimple() && LT.second != VT.getSimpleVT() && LT.second.getVectorNumElements() == NumElem) // Promotion requires expand/truncate for data and a shuffle for mask. - Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, nullptr) + - getShuffleCost(TTI::SK_Alternate, MaskTy, 0, nullptr); + Cost += getShuffleCost(TTI::SK_Select, SrcVTy, 0, nullptr) + + getShuffleCost(TTI::SK_Select, MaskTy, 0, nullptr); else if (LT.second.getVectorNumElements() > NumElem) { VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(), Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -313,7 +313,7 @@ if ((CommonShuffleMode == FirstAlternate || CommonShuffleMode == SecondAlternate) && Vec2) - return TargetTransformInfo::SK_Alternate; + return TargetTransformInfo::SK_Select; // If Vec2 was never used, we have a permutation of a single vector, otherwise // we have permutation of 2 vectors. return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc @@ -2462,7 +2462,7 @@ VecCost += TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK); VecCost += - TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0); + TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0); return ReuseShuffleCost + VecCost - ScalarCost; } default: Index: test/Analysis/CostModel/X86/alternate-shuffle-cost.ll =================================================================== --- test/Analysis/CostModel/X86/alternate-shuffle-cost.ll +++ test/Analysis/CostModel/X86/alternate-shuffle-cost.ll @@ -200,12 +200,24 @@ } define <4 x i32> @test_v4i32_3(<4 x i32> %a, <4 x i32> %b) { -; CHECK-LABEL: 'test_v4i32_3' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1 +; SSE2-LABEL: 'test_v4i32_3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1 +; +; SSSE3-LABEL: 'test_v4i32_3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1 +; +; SSE42-LABEL: 'test_v4i32_3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1 +; +; AVX-LABEL: 'test_v4i32_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1 ; ; BTVER2-LABEL: 'test_v4i32_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1 ; %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -263,12 +275,24 @@ } define <4 x float> @test_v4f32_3(<4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: 'test_v4f32_3' -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1 +; SSE2-LABEL: 'test_v4f32_3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1 +; +; SSSE3-LABEL: 'test_v4f32_3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1 +; +; SSE42-LABEL: 'test_v4f32_3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1 +; +; AVX-LABEL: 'test_v4f32_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1 ; ; BTVER2-LABEL: 'test_v4f32_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1 ; %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -311,27 +335,15 @@ define <4 x i64> @test_v4i64_3(<4 x i64> %a, <4 x i64> %b) { ; SSE-LABEL: 'test_v4i64_3' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 ; -; AVX1-LABEL: 'test_v4i64_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 -; -; AVX2-LABEL: 'test_v4i64_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 -; -; XOPAVX1-LABEL: 'test_v4i64_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 -; -; XOPAVX2-LABEL: 'test_v4i64_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 +; AVX-LABEL: 'test_v4i64_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 ; ; BTVER2-LABEL: 'test_v4i64_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1 ; %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> @@ -374,27 +386,15 @@ define <4 x double> @test_v4f64_3(<4 x double> %a, <4 x double> %b) { ; SSE-LABEL: 'test_v4f64_3' -; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> +; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 ; -; AVX1-LABEL: 'test_v4f64_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 -; -; AVX2-LABEL: 'test_v4f64_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 -; -; XOPAVX1-LABEL: 'test_v4f64_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 -; -; XOPAVX2-LABEL: 'test_v4f64_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 +; AVX-LABEL: 'test_v4f64_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 ; ; BTVER2-LABEL: 'test_v4f64_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1 ; %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> @@ -455,7 +455,7 @@ define <8 x i16> @test_v8i16_3(<8 x i16> %a, <8 x i16> %b) { ; SSE2-LABEL: 'test_v8i16_3' -; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 ; ; SSSE3-LABEL: 'test_v8i16_3' @@ -463,27 +463,15 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 ; ; SSE42-LABEL: 'test_v8i16_3' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 ; -; AVX1-LABEL: 'test_v8i16_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 -; -; AVX2-LABEL: 'test_v8i16_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 -; -; XOPAVX1-LABEL: 'test_v8i16_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 -; -; XOPAVX2-LABEL: 'test_v8i16_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 +; AVX-LABEL: 'test_v8i16_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 ; ; BTVER2-LABEL: 'test_v8i16_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1 ; %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -541,28 +529,24 @@ } define <8 x i32> @test_v8i32_3(<8 x i32> %a, <8 x i32> %b) { -; SSE-LABEL: 'test_v8i32_3' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 -; -; AVX1-LABEL: 'test_v8i32_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 -; -; AVX2-LABEL: 'test_v8i32_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 -; -; XOPAVX1-LABEL: 'test_v8i32_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 -; -; XOPAVX2-LABEL: 'test_v8i32_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 +; SSE2-LABEL: 'test_v8i32_3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 +; +; SSSE3-LABEL: 'test_v8i32_3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 +; +; SSE42-LABEL: 'test_v8i32_3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 +; +; AVX-LABEL: 'test_v8i32_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 ; ; BTVER2-LABEL: 'test_v8i32_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1 ; %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -620,28 +604,24 @@ } define <8 x float> @test_v8f32_3(<8 x float> %a, <8 x float> %b) { -; SSE-LABEL: 'test_v8f32_3' -; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> -; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 -; -; AVX1-LABEL: 'test_v8f32_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 -; -; AVX2-LABEL: 'test_v8f32_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 -; -; XOPAVX1-LABEL: 'test_v8f32_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 -; -; XOPAVX2-LABEL: 'test_v8f32_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 +; SSE2-LABEL: 'test_v8f32_3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 +; +; SSSE3-LABEL: 'test_v8f32_3' +; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 +; +; SSE42-LABEL: 'test_v8f32_3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 +; +; AVX-LABEL: 'test_v8f32_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 ; ; BTVER2-LABEL: 'test_v8f32_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1 ; %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> @@ -702,7 +682,7 @@ define <16 x i8> @test_v16i8_3(<16 x i8> %a, <16 x i8> %b) { ; SSE2-LABEL: 'test_v16i8_3' -; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; ; SSSE3-LABEL: 'test_v16i8_3' @@ -710,27 +690,15 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; ; SSE42-LABEL: 'test_v16i8_3' -; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; -; AVX1-LABEL: 'test_v16i8_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 -; -; AVX2-LABEL: 'test_v16i8_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 -; -; XOPAVX1-LABEL: 'test_v16i8_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 -; -; XOPAVX2-LABEL: 'test_v16i8_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 +; AVX-LABEL: 'test_v16i8_3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; ; BTVER2-LABEL: 'test_v16i8_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -813,35 +781,35 @@ define <16 x i16> @test_v16i16_3(<16 x i16> %a, <16 x i16> %b) { ; SSE2-LABEL: 'test_v16i16_3' -; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; SSSE3-LABEL: 'test_v16i16_3' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; SSE42-LABEL: 'test_v16i16_3' -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; AVX1-LABEL: 'test_v16i16_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; AVX2-LABEL: 'test_v16i16_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; XOPAVX1-LABEL: 'test_v16i16_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; XOPAVX2-LABEL: 'test_v16i16_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; ; BTVER2-LABEL: 'test_v16i16_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1 ; %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> @@ -924,35 +892,35 @@ define <32 x i8> @test_v32i8_3(<32 x i8> %a, <32 x i8> %b) { ; SSE2-LABEL: 'test_v32i8_3' -; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; SSSE3-LABEL: 'test_v32i8_3' -; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; SSE42-LABEL: 'test_v32i8_3' -; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; AVX1-LABEL: 'test_v32i8_3' -; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; AVX2-LABEL: 'test_v32i8_3' -; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; XOPAVX1-LABEL: 'test_v32i8_3' -; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; XOPAVX2-LABEL: 'test_v32i8_3' -; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; ; BTVER2-LABEL: 'test_v32i8_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1 ; %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> Index: test/Analysis/CostModel/X86/reduction.ll =================================================================== --- test/Analysis/CostModel/X86/reduction.ll +++ test/Analysis/CostModel/X86/reduction.ll @@ -124,7 +124,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 @@ -135,7 +135,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 @@ -185,7 +185,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 @@ -196,7 +196,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.1, %rdx.shuf.0.0 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx.1 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx.1, i32 0 @@ -762,7 +762,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 @@ -772,7 +772,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x float> %bin.rdx8, i32 0 @@ -814,7 +814,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 @@ -824,7 +824,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 @@ -834,7 +834,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 @@ -844,7 +844,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.0.0 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.0.1 = shufflevector <4 x double> %rdx, <4 x double> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = fadd <4 x double> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.1.1 = shufflevector <4 x double> %bin.rdx, <4 x double> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <4 x double> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x double> %bin.rdx8, i32 0 @@ -879,7 +879,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 @@ -892,7 +892,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 @@ -905,7 +905,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 @@ -918,7 +918,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x float> %bin.rdx, <8 x float> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = fadd <8 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.2.0 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x float> %bin.rdx8, <8 x float> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = fadd <8 x float> %rdx.shuf.2.0, %rdx.shuf.2.1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r = extractelement <8 x float> %bin.rdx9, i32 0 @@ -993,7 +993,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 @@ -1003,7 +1003,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i32> %rdx, <4 x i32> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx = add <4 x i32> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <4 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r = extractelement <4 x i32> %bin.rdx8, i32 0 @@ -1045,7 +1045,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 @@ -1055,7 +1055,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 @@ -1065,7 +1065,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 @@ -1075,7 +1075,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.0.0 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.0.1 = shufflevector <4 x i64> %rdx, <4 x i64> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx = add <4 x i64> %rdx.shuf.0.0, %rdx.shuf.0.1 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.1.0 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.1.1 = shufflevector <4 x i64> %bin.rdx, <4 x i64> undef, <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <4 x i64> %rdx.shuf.1.0, %rdx.shuf.1.1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <4 x i64> %bin.rdx8, i32 0 @@ -1110,7 +1110,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 @@ -1123,7 +1123,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i16> %bin.rdx, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx8 = add <8 x i16> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i16> %bin.rdx8, <8 x i16> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bin.rdx9 = add <8 x i16> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %r = extractelement <8 x i16> %bin.rdx9, i32 0 @@ -1177,7 +1177,7 @@ ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 @@ -1190,7 +1190,7 @@ ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; SSSE3-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSSE3-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 @@ -1203,7 +1203,7 @@ ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 ; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0 @@ -1216,7 +1216,7 @@ ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.0 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.1.1 = shufflevector <8 x i32> %bin.rdx, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx8 = add <8 x i32> %rdx.shuf.1.0, %rdx.shuf.1.1 -; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %rdx.shuf.2.0 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %rdx.shuf.2.1 = shufflevector <8 x i32> %bin.rdx8, <8 x i32> undef, <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bin.rdx9 = add <8 x i32> %rdx.shuf.2.0, %rdx.shuf.2.1 ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r = extractelement <8 x i32> %bin.rdx9, i32 0