Index: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
@@ -641,7 +641,9 @@
   enum ShuffleKind {
     SK_Broadcast,       ///< Broadcast element 0 to all other elements.
     SK_Reverse,         ///< Reverse the order of the vector.
-    SK_Alternate,       ///< Choose alternate elements from vector.
+    SK_Select,          ///< Selects elements from the corresponding lane of
+                        ///< either source operand. This is equivalent to a
+                        ///< vector select with a constant condition operand.
     SK_Transpose,       ///< Transpose two vectors.
     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
     SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
Index: llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
+++ llvm/trunk/include/llvm/CodeGen/BasicTTIImpl.h
@@ -554,7 +554,7 @@
   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                           Type *SubTp) {
     switch (Kind) {
-    case TTI::SK_Alternate:
+    case TTI::SK_Select:
     case TTI::SK_Transpose:
     case TTI::SK_PermuteSingleSrc:
     case TTI::SK_PermuteTwoSrc:
Index: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
@@ -674,29 +674,25 @@
   return IdentityLHS || IdentityRHS;
 }
 
-static bool isAlternateVectorMask(ArrayRef<int> Mask) {
-  bool isAlternate = true;
+static bool isSelectVectorMask(ArrayRef<int> Mask) {
+  bool IsSelect = true;
+  bool FoundLHS = false;
+  bool FoundRHS = false;
   unsigned MaskSize = Mask.size();
 
-  // Example: shufflevector A, B, <0,5,2,7>
-  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
-    if (Mask[i] < 0)
-      continue;
-    isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
-  }
-
-  if (isAlternate)
-    return true;
-
-  isAlternate = true;
+  // Example: shufflevector A, B, <0,1,6,3>
   // Example: shufflevector A, B, <4,1,6,3>
-  for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
+  for (unsigned i = 0; i < MaskSize && IsSelect; ++i) {
     if (Mask[i] < 0)
       continue;
-    isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
+    bool IsLHS = (Mask[i] == (int)i);
+    bool IsRHS = (Mask[i] == (int)(i + MaskSize));
+    FoundLHS |= IsLHS;
+    FoundRHS |= IsRHS;
+    IsSelect = IsLHS || IsRHS;
   }
-
-  return isAlternate;
+  // If we don't use both vectors this is really an Identity mask.
+  return IsSelect && FoundLHS && FoundRHS;
 }
 
 static bool isTransposeVectorMask(ArrayRef<int> Mask) {
@@ -1236,8 +1232,8 @@
         return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Reverse,
                                        VecTypOp0, 0, nullptr);
 
-      if (isAlternateVectorMask(Mask))
-        return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Alternate,
+      if (isSelectVectorMask(Mask))
+        return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Select,
                                        VecTypOp0, 0, nullptr);
 
       if (isTransposeVectorMask(Mask))
Index: llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ llvm/trunk/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -400,8 +400,8 @@
 
 int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                                Type *SubTp) {
-  // We only handle costs of reverse and alternate shuffles for now.
-  if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
+  // We only handle costs of reverse and select shuffles for now.
+  if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Select)
     return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 
   if (Kind == TTI::SK_Reverse) {
@@ -426,9 +426,9 @@
 
     return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
   }
-  if (Kind == TTI::SK_Alternate) {
-    static const CostTblEntry NEONAltShuffleTbl[] = {
-        // Alt shuffle cost table for ARM. Cost is the number of instructions
+  if (Kind == TTI::SK_Select) {
+    static const CostTblEntry NEONSelShuffleTbl[] = {
+        // Select shuffle cost table for ARM. Cost is the number of instructions
         // required to create the shuffled vector.
 
         {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
@@ -445,7 +445,7 @@
         {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
 
     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-    if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl,
+    if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
                                             ISD::VECTOR_SHUFFLE, LT.second))
       return LT.first * Entry->Cost;
     return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
Index: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -912,8 +912,8 @@
     { TTI::SK_Reverse,   MVT::v16i16, 2 }, // vperm2i128 + pshufb
     { TTI::SK_Reverse,   MVT::v32i8,  2 }, // vperm2i128 + pshufb
 
-    { TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
-    { TTI::SK_Alternate, MVT::v32i8,  1 }, // vpblendvb
+    { TTI::SK_Select,    MVT::v16i16, 1 }, // vpblendvb
+    { TTI::SK_Select,    MVT::v32i8,  1 }, // vpblendvb
 
     { TTI::SK_PermuteSingleSrc, MVT::v4f64,  1 }, // vpermpd
     { TTI::SK_PermuteSingleSrc, MVT::v8f32,  1 }, // vpermps
@@ -977,12 +977,12 @@
     { TTI::SK_Reverse,   MVT::v32i8,  4 }, // vextractf128 + 2*pshufb
                                            // + vinsertf128
 
-    { TTI::SK_Alternate, MVT::v4i64,  1 }, // vblendpd
-    { TTI::SK_Alternate, MVT::v4f64,  1 }, // vblendpd
-    { TTI::SK_Alternate, MVT::v8i32,  1 }, // vblendps
-    { TTI::SK_Alternate, MVT::v8f32,  1 }, // vblendps
-    { TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
-    { TTI::SK_Alternate, MVT::v32i8,  3 }, // vpand + vpandn + vpor
+    { TTI::SK_Select,    MVT::v4i64,  1 }, // vblendpd
+    { TTI::SK_Select,    MVT::v4f64,  1 }, // vblendpd
+    { TTI::SK_Select,    MVT::v8i32,  1 }, // vblendps
+    { TTI::SK_Select,    MVT::v8f32,  1 }, // vblendps
+    { TTI::SK_Select,    MVT::v16i16, 3 }, // vpand + vpandn + vpor
+    { TTI::SK_Select,    MVT::v32i8,  3 }, // vpand + vpandn + vpor
 
     { TTI::SK_PermuteSingleSrc, MVT::v4f64,  3 }, // 2*vperm2f128 + vshufpd
     { TTI::SK_PermuteSingleSrc, MVT::v4i64,  3 }, // 2*vperm2f128 + vshufpd
@@ -1008,12 +1008,12 @@
       return LT.first * Entry->Cost;
 
   static const CostTblEntry SSE41ShuffleTbl[] = {
-    { TTI::SK_Alternate, MVT::v2i64,  1 }, // pblendw
-    { TTI::SK_Alternate, MVT::v2f64,  1 }, // movsd
-    { TTI::SK_Alternate, MVT::v4i32,  1 }, // pblendw
-    { TTI::SK_Alternate, MVT::v4f32,  1 }, // blendps
-    { TTI::SK_Alternate, MVT::v8i16,  1 }, // pblendw
-    { TTI::SK_Alternate, MVT::v16i8,  1 }  // pblendvb
+    { TTI::SK_Select,    MVT::v2i64,  1 }, // pblendw
+    { TTI::SK_Select,    MVT::v2f64,  1 }, // movsd
+    { TTI::SK_Select,    MVT::v4i32,  1 }, // pblendw
+    { TTI::SK_Select,    MVT::v4f32,  1 }, // blendps
+    { TTI::SK_Select,    MVT::v8i16,  1 }, // pblendw
+    { TTI::SK_Select,    MVT::v16i8,  1 }  // pblendvb
   };
 
   if (ST->hasSSE41())
@@ -1027,8 +1027,8 @@
     { TTI::SK_Reverse,   MVT::v8i16,  1 }, // pshufb
     { TTI::SK_Reverse,   MVT::v16i8,  1 }, // pshufb
 
-    { TTI::SK_Alternate, MVT::v8i16,  3 }, // 2*pshufb + por
-    { TTI::SK_Alternate, MVT::v16i8,  3 }, // 2*pshufb + por
+    { TTI::SK_Select,    MVT::v8i16,  3 }, // 2*pshufb + por
+    { TTI::SK_Select,    MVT::v16i8,  3 }, // 2*pshufb + por
 
     { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb
     { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
@@ -1055,11 +1055,11 @@
     { TTI::SK_Reverse,   MVT::v16i8,  9 }, // 2*pshuflw + 2*pshufhw
                                            // + 2*pshufd + 2*unpck + packus
 
-    { TTI::SK_Alternate, MVT::v2i64,  1 }, // movsd
-    { TTI::SK_Alternate, MVT::v2f64,  1 }, // movsd
-    { TTI::SK_Alternate, MVT::v4i32,  2 }, // 2*shufps
-    { TTI::SK_Alternate, MVT::v8i16,  3 }, // pand + pandn + por
-    { TTI::SK_Alternate, MVT::v16i8,  3 }, // pand + pandn + por
+    { TTI::SK_Select,    MVT::v2i64,  1 }, // movsd
+    { TTI::SK_Select,    MVT::v2f64,  1 }, // movsd
+    { TTI::SK_Select,    MVT::v4i32,  2 }, // 2*shufps
+    { TTI::SK_Select,    MVT::v8i16,  3 }, // pand + pandn + por
+    { TTI::SK_Select,    MVT::v16i8,  3 }, // pand + pandn + por
 
     { TTI::SK_PermuteSingleSrc, MVT::v2f64,  1 }, // shufpd
     { TTI::SK_PermuteSingleSrc, MVT::v2i64,  1 }, // pshufd
@@ -1083,7 +1083,7 @@
   static const CostTblEntry SSE1ShuffleTbl[] = {
     { TTI::SK_Broadcast,        MVT::v4f32, 1 }, // shufps
     { TTI::SK_Reverse,          MVT::v4f32, 1 }, // shufps
-    { TTI::SK_Alternate,        MVT::v4f32, 2 }, // 2*shufps
+    { TTI::SK_Select,           MVT::v4f32, 2 }, // 2*shufps
     { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // shufps
     { TTI::SK_PermuteTwoSrc,    MVT::v4f32, 2 }, // 2*shufps
   };
@@ -1941,8 +1941,8 @@
   if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
       LT.second.getVectorNumElements() == NumElem)
     // Promotion requires expand/truncate for data and a shuffle for mask.
-    Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, nullptr) +
-            getShuffleCost(TTI::SK_Alternate, MaskTy, 0, nullptr);
+    Cost += getShuffleCost(TTI::SK_Select, SrcVTy, 0, nullptr) +
+            getShuffleCost(TTI::SK_Select, MaskTy, 0, nullptr);
 
   else if (LT.second.getVectorNumElements() > NumElem) {
     VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
Index: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================
--- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -313,7 +313,7 @@
   if ((CommonShuffleMode == FirstAlternate ||
        CommonShuffleMode == SecondAlternate) &&
       Vec2)
-    return TargetTransformInfo::SK_Alternate;
+    return TargetTransformInfo::SK_Select;
   // If Vec2 was never used, we have a permutation of a single vector, otherwise
   // we have permutation of 2 vectors.
   return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc
@@ -2461,8 +2461,7 @@
       Instruction *I1 = cast<Instruction>(VL[1]);
       VecCost +=
           TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK);
-      VecCost +=
-          TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0);
+      VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0);
       return ReuseShuffleCost + VecCost - ScalarCost;
     }
     default:
Index: llvm/trunk/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll
===================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll
+++ llvm/trunk/test/Analysis/CostModel/X86/alternate-shuffle-cost.ll
@@ -200,12 +200,24 @@
 }
 
 define <4 x i32> @test_v4i32_3(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: 'test_v4i32_3'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
+; SSE2-LABEL: 'test_v4i32_3'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
+;
+; SSSE3-LABEL: 'test_v4i32_3'
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
+;
+; SSE42-LABEL: 'test_v4i32_3'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
+;
+; AVX-LABEL: 'test_v4i32_3'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
 ;
 ; BTVER2-LABEL: 'test_v4i32_3'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
 ;
   %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
@@ -263,12 +275,24 @@
 }
 
 define <4 x float> @test_v4f32_3(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: 'test_v4f32_3'
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1
+; SSE2-LABEL: 'test_v4f32_3'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1
+;
+; SSSE3-LABEL: 'test_v4f32_3'
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1
+;
+; SSE42-LABEL: 'test_v4f32_3'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1
+;
+; AVX-LABEL: 'test_v4f32_3'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1
 ;
 ; BTVER2-LABEL: 'test_v4f32_3'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1
 ;
   %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
@@ -311,27 +335,15 @@
 
 define <4 x i64> @test_v4i64_3(<4 x i64> %a, <4 x i64> %b) {
 ; SSE-LABEL: 'test_v4i64_3'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
 ;
-; AVX1-LABEL: 'test_v4i64_3'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
-;
-; AVX2-LABEL: 'test_v4i64_3'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
-;
-; XOPAVX1-LABEL: 'test_v4i64_3'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
-;
-; XOPAVX2-LABEL: 'test_v4i64_3'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
+; AVX-LABEL: 'test_v4i64_3'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
 ;
 ; BTVER2-LABEL: 'test_v4i64_3'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
 ;
   %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
@@ -374,27 +386,15 @@
 
 define <4 x double> @test_v4f64_3(<4 x double> %a, <4 x double> %b) {
 ; SSE-LABEL: 'test_v4f64_3'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
+; SSE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
 ; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
 ;
-; AVX1-LABEL: 'test_v4f64_3'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
-;
-; AVX2-LABEL: 'test_v4f64_3'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
-;
-; XOPAVX1-LABEL: 'test_v4f64_3'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
-;
-; XOPAVX2-LABEL: 'test_v4f64_3'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
+; AVX-LABEL: 'test_v4f64_3'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
 ;
 ; BTVER2-LABEL: 'test_v4f64_3'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
 ;
   %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
@@ -455,7 +455,7 @@
 
 define <8 x i16> @test_v8i16_3(<8 x i16> %a, <8 x i16> %b) {
 ; SSE2-LABEL: 'test_v8i16_3'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
 ;
 ; SSSE3-LABEL: 'test_v8i16_3'
@@ -463,27 +463,15 @@
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
 ;
 ; SSE42-LABEL: 'test_v8i16_3'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
 ;
-; AVX1-LABEL: 'test_v8i16_3'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
-;
-; AVX2-LABEL: 'test_v8i16_3'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
-;
-; XOPAVX1-LABEL: 'test_v8i16_3'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
-;
-; XOPAVX2-LABEL: 'test_v8i16_3'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
+; AVX-LABEL: 'test_v8i16_3'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
 ;
 ; BTVER2-LABEL: 'test_v8i16_3'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
 ;
   %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
@@ -541,28 +529,24 @@
 }
 
 define <8 x i32> @test_v8i32_3(<8 x i32> %a, <8 x i32> %b) {
-; SSE-LABEL: 'test_v8i32_3'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
-;
-; AVX1-LABEL: 'test_v8i32_3'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
-;
-; AVX2-LABEL: 'test_v8i32_3'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
-;
-; XOPAVX1-LABEL: 'test_v8i32_3'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
-;
-; XOPAVX2-LABEL: 'test_v8i32_3'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
+; SSE2-LABEL: 'test_v8i32_3'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
+;
+; SSSE3-LABEL: 'test_v8i32_3'
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
+;
+; SSE42-LABEL: 'test_v8i32_3'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
+;
+; AVX-LABEL: 'test_v8i32_3'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
 ;
 ; BTVER2-LABEL: 'test_v8i32_3'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
 ;
   %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
@@ -620,28 +604,24 @@
 }
 
 define <8 x float> @test_v8f32_3(<8 x float> %a, <8 x float> %b) {
-; SSE-LABEL: 'test_v8f32_3'
-; SSE-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; SSE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
-;
-; AVX1-LABEL: 'test_v8f32_3'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
-;
-; AVX2-LABEL: 'test_v8f32_3'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
-;
-; XOPAVX1-LABEL: 'test_v8f32_3'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
-;
-; XOPAVX2-LABEL: 'test_v8f32_3'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
+; SSE2-LABEL: 'test_v8f32_3'
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
+;
+; SSSE3-LABEL: 'test_v8f32_3'
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
+;
+; SSE42-LABEL: 'test_v8f32_3'
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
+;
+; AVX-LABEL: 'test_v8f32_3'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
 ;
 ; BTVER2-LABEL: 'test_v8f32_3'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
 ;
   %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
@@ -702,7 +682,7 @@
 
 define <16 x i8> @test_v16i8_3(<16 x i8> %a, <16 x i8> %b) {
 ; SSE2-LABEL: 'test_v16i8_3'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
 ;
 ; SSSE3-LABEL: 'test_v16i8_3'
@@ -710,27 +690,15 @@
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
 ;
 ; SSE42-LABEL: 'test_v16i8_3'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
 ;
-; AVX1-LABEL: 'test_v16i8_3'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
-;
-; AVX2-LABEL: 'test_v16i8_3'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
-;
-; XOPAVX1-LABEL: 'test_v16i8_3'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
-;
-; XOPAVX2-LABEL: 'test_v16i8_3'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
+; AVX-LABEL: 'test_v16i8_3'
+; AVX-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
+; AVX-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
 ;
 ; BTVER2-LABEL: 'test_v16i8_3'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
 ;
   %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
@@ -813,35 +781,35 @@
 
 define <16 x i16> @test_v16i16_3(<16 x i16> %a, <16 x i16> %b) {
 ; SSE2-LABEL: 'test_v16i16_3'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
 ;
 ; SSSE3-LABEL: 'test_v16i16_3'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
 ;
 ; SSE42-LABEL: 'test_v16i16_3'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
 ;
 ; AVX1-LABEL: 'test_v16i16_3'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
 ;
 ; AVX2-LABEL: 'test_v16i16_3'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
 ;
 ; XOPAVX1-LABEL: 'test_v16i16_3'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
 ;
 ; XOPAVX2-LABEL: 'test_v16i16_3'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
 ;
 ; BTVER2-LABEL: 'test_v16i16_3'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
 ;
   %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
@@ -924,35 +892,35 @@
 
 define <32 x i8> @test_v32i8_3(<32 x i8> %a, <32 x i8> %b) {
 ; SSE2-LABEL: 'test_v32i8_3'
-; SSE2-NEXT:  Cost Model: Found an estimated cost of 78 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
+; SSE2-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
 ; SSE2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
 ;
 ; SSSE3-LABEL: 'test_v32i8_3'
-; SSSE3-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
+; SSSE3-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
 ; SSSE3-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
 ;
 ; SSE42-LABEL: 'test_v32i8_3'
-; SSE42-NEXT:  Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
+; SSE42-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
 ; SSE42-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
 ;
 ; AVX1-LABEL: 'test_v32i8_3'
-; AVX1-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
+; AVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
 ; AVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
 ;
 ; AVX2-LABEL: 'test_v32i8_3'
-; AVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
+; AVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
 ; AVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
 ;
 ; XOPAVX1-LABEL: 'test_v32i8_3'
-; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 9 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
+; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
 ; XOPAVX1-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
 ;
 ; XOPAVX2-LABEL: 'test_v32i8_3'
-; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
+; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
 ; XOPAVX2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
 ;
 ; BTVER2-LABEL: 'test_v32i8_3'
-; BTVER2-NEXT:  Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
+; BTVER2-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
 ; BTVER2-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
 ;
   %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>