diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6842,6 +6842,37 @@
   return Cost;
 }
 
+/// Checks if the buildvector last insertelement, with \p I as a part of this
+/// buildvector, dominates all other uses of \p Root.
+static bool dominatesAllUses(InsertElementInst *I, InsertElementInst *Root) {
+  if (I->getParent() != Root->getParent())
+    return false;
+  // Find top insertelement of the buildvector.
+  Value *V = I;
+  InsertElementInst *Top = I;
+  while (auto *IE = dyn_cast<InsertElementInst>(V)) {
+    if (!IE->hasOneUse())
+      break;
+    Top = IE;
+    V = IE->user_back();
+  }
+  if (Top->getParent() != Root->getParent())
+    return false;
+  BasicBlock *BB = Top->getParent();
+  unsigned Cnt = 0;
+  for (const User *U : Root->users()) {
+    auto *UInst = dyn_cast<Instruction>(U);
+    if (!UInst || UInst->getParent() != BB)
+      continue;
+    if (UInst->comesBefore(Top) || UInst == Top) {
+      ++Cnt;
+      if (Cnt > 1)
+        return false;
+    }
+  }
+  return true;
+}
+
 /// Check if two insertelement instructions are from the same buildvector.
 static bool areTwoInsertFromSameBuildVector(InsertElementInst *VU,
                                             InsertElementInst *V) {
@@ -6854,6 +6885,10 @@
   // Multiple used inserts are separate nodes.
   if (!VU->hasOneUse() && !V->hasOneUse())
     return false;
+  auto &&IsAllowedToBeInBuildVector = [](InsertElementInst *V,
+                                         InsertElementInst *TopI) {
+    return V->hasOneUse() || dominatesAllUses(TopI, V);
+  };
   auto *IE1 = VU;
   auto *IE2 = V;
   unsigned Idx1 = *getInsertIndex(IE1);
@@ -6863,9 +6898,9 @@
   // IE1.
   do {
     if (IE2 == VU)
-      return VU->hasOneUse();
+      return IsAllowedToBeInBuildVector(VU, V);
     if (IE1 == V)
-      return V->hasOneUse();
+      return IsAllowedToBeInBuildVector(V, VU);
     if (IE1) {
       if ((IE1 != VU && !IE1->hasOneUse()) ||
           getInsertIndex(IE1).value_or(Idx2) == Idx2)
@@ -6915,7 +6950,8 @@
 
 namespace {
 /// Returns incoming Value *, if the requested type is Value * too, or a default
-/// value, otherwise.
+/// value, otherwise. Also, if the requested type is Value * and the incoming
+/// value is not Value *, returns nullptr.
 struct ValueSelect {
   template <typename U>
   static typename std::enable_if<std::is_same<Value *, U>::value, Value *>::type
@@ -6923,6 +6959,11 @@
     return V;
   }
   template <typename U>
+  static typename std::enable_if<!std::is_same<Value *, U>::value, Value *>::type
+  get(U) {
+    return nullptr;
+  }
+  template <typename U>
   static typename std::enable_if<!std::is_same<Value *, U>::value, U>::type
   get(Value *) {
     return U();
@@ -6952,21 +6993,36 @@
   SmallVector<int> Mask(ShuffleMask.begin()->second);
   auto VMIt = std::next(ShuffleMask.begin());
   T *Prev = nullptr;
-  bool IsBaseNotUndef = !isUndefVector(Base);
+  Value *FirstV = ValueSelect::get<T *>(ShuffleMask.begin()->first);
+  bool IsBaseNotUndef =
+      !(FirstV ? (isa<PoisonValue>(Base) ||
+                  (isUndefVector(Base) && isGuaranteedNotToBePoison(FirstV)))
+               : isUndefVector(Base));
   if (IsBaseNotUndef) {
     // Base is not undef, need to combine it with the next subvectors.
     std::pair<T *, bool> Res = ResizeAction(ShuffleMask.begin()->first, Mask);
-    for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
-      if (Mask[Idx] == UndefMaskElem)
+    bool IsCompleteIdentity = true;
+    for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
+      if (Mask[Idx] == UndefMaskElem) {
         Mask[Idx] = Idx;
-      else
+        IsCompleteIdentity = false;
+      } else {
+        IsCompleteIdentity &= (Res.second || Mask[Idx] == Idx);
         Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF;
+      }
+    }
+    if (IsCompleteIdentity) {
+      // Found complete identity, i.e. elements only from single vector are
+      // selected in the identity order.
+      Prev = Res.first;
+      std::iota(Mask.begin(), Mask.end(), 0);
+    } else {
+      auto *V = ValueSelect::get<T *>(Base);
+      (void)V;
+      assert((!V || GetVF(V) == Mask.size()) &&
+             "Expected base vector of VF number of elements.");
+      Prev = Action(Mask, {nullptr, Res.first});
     }
-    auto *V = ValueSelect::get<T *>(Base);
-    (void)V;
-    assert((!V || GetVF(V) == Mask.size()) &&
-           "Expected base vector of VF number of elements.");
-    Prev = Action(Mask, {nullptr, Res.first});
   } else if (ShuffleMask.size() == 1) {
     // Base is undef and only 1 vector is shuffled - perform the action only for
     // single vector, if the mask is not the identity mask.
@@ -7773,43 +7829,63 @@
     DenseMap<Value *, unsigned> UniquePositions;
     unsigned NumValues =
         std::distance(VL.begin(), find_if(reverse(VL), [](Value *V) {
-                                    return !isa<UndefValue>(V);
+                                    return !isa<PoisonValue>(V);
                                   }).base());
-    VF = std::max<unsigned>(VF, PowerOf2Ceil(NumValues));
-    int UniqueVals = 0;
+    VF = std::min<unsigned>(VF, PowerOf2Ceil(NumValues));
+    bool IsIdentity = true;
+    int UndefIdx = UndefMaskElem;
+    int ReplaceUndefIdx = UndefMaskElem;
     for (Value *V : VL.drop_back(VL.size() - VF)) {
-      if (isa<UndefValue>(V)) {
+      if (isa<PoisonValue>(V)) {
         ReuseShuffleIndicies.emplace_back(UndefMaskElem);
         continue;
       }
       if (isConstant(V)) {
-        ReuseShuffleIndicies.emplace_back(UniqueValues.size());
-        UniqueValues.emplace_back(V);
+        if (isa<UndefValue>(V)) {
+          if (UndefIdx == UndefMaskElem) {
+            UndefIdx = UniqueValues.size();
+            UniqueValues.emplace_back(V);
+          }
+          ReuseShuffleIndicies.emplace_back(UndefIdx);
+        } else {
+          ReuseShuffleIndicies.emplace_back(UniqueValues.size());
+          UniqueValues.emplace_back(V);
+        }
         continue;
       }
       auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
       ReuseShuffleIndicies.emplace_back(Res.first->second);
       if (Res.second) {
         UniqueValues.emplace_back(V);
-        ++UniqueVals;
-      }
-    }
-    if (UniqueVals == 1 && UniqueValues.size() == 1) {
-      // Emit pure splat vector.
-      ReuseShuffleIndicies.append(VF - ReuseShuffleIndicies.size(),
-                                  UndefMaskElem);
-    } else if (UniqueValues.size() >= VF - 1 || UniqueValues.size() <= 1) {
-      if (UniqueValues.empty()) {
-        assert(all_of(VL, UndefValue::classof) && "Expected list of undefs.");
-        NumValues = VF;
-      }
+        if (ReplaceUndefIdx == UndefMaskElem)
+          ReplaceUndefIdx = Res.first->second;
+      } else {
+        IsIdentity = false;
+      }
+    }
+    // Check if undef values can be safely replaced by some non-const vals.
+    if (UndefIdx != UndefMaskElem && ReplaceUndefIdx != UndefMaskElem &&
+        !IsIdentity) {
+      int MinIdx = std::min(ReplaceUndefIdx, UndefIdx);
+      for (unsigned I = 0; I < VF; ++I)
+        if (ReuseShuffleIndicies[I] == UndefIdx ||
+            ReuseShuffleIndicies[I] == ReplaceUndefIdx)
+          ReuseShuffleIndicies[I] = MinIdx;
+      if (MinIdx != ReplaceUndefIdx) {
+        std::swap(UniqueValues[ReplaceUndefIdx], UniqueValues[UndefIdx]);
+        UndefIdx = ReplaceUndefIdx;
+      }
+      UniqueValues.erase(std::next(UniqueValues.begin(), UndefIdx));
+    }
+    if (!IsIdentity) {
+      UniqueValues.append((UniqueValues.size() == 1 ? VL.size() : VF) -
+                              UniqueValues.size(),
+                          PoisonValue::get(VL[0]->getType()));
+      ReuseShuffleIndicies.append(VL.size() - VF, UndefMaskElem);
+      VL = UniqueValues;
+    } else {
       ReuseShuffleIndicies.clear();
-      UniqueValues.clear();
-      UniqueValues.append(VL.begin(), std::next(VL.begin(), NumValues));
     }
-    UniqueValues.append(VF - UniqueValues.size(),
-                        PoisonValue::get(VL[0]->getType()));
-    VL = UniqueValues;
   }
 
   ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq,
@@ -7985,11 +8061,12 @@
         }
       }
 
-      if ((!IsIdentity || Offset != 0 ||
-           !isUndefVector(FirstInsert->getOperand(0))) &&
+      bool IsUndefFirstOp = isUndefVector(FirstInsert->getOperand(0));
+      if ((!IsIdentity || Offset != 0 || !IsUndefFirstOp) &&
           NumElts != NumScalars) {
-        SmallVector<int> InsertMask(NumElts);
-        std::iota(InsertMask.begin(), InsertMask.end(), 0);
+        SmallVector<int> InsertMask(NumElts, UndefMaskElem);
+        if (!IsUndefFirstOp)
+          std::iota(InsertMask.begin(), InsertMask.end(), 0);
         for (unsigned I = 0; I < NumElts; I++) {
           if (Mask[I] != UndefMaskElem)
             InsertMask[Offset + I] = NumElts + I;
@@ -8849,6 +8926,8 @@
                             PoisonValue::get(IE->getOperand(0)->getType()));
       IE->replaceUsesOfWith(IE->getOperand(1),
                             PoisonValue::get(IE->getOperand(1)->getType()));
+      if (IE == FirstInsert && dominatesAllUses(LastInsert, FirstInsert))
+        FirstInsert->replaceAllUsesWith(NewInst);
       eraseInstruction(IE);
     }
     CSEBlocks.insert(LastInsert->getParent());
@@ -8960,25 +9039,91 @@
       return I1->isIdenticalTo(I2);
     if (SI1->isIdenticalTo(SI2))
       return true;
-    for (int I = 0, E = SI1->getNumOperands(); I < E; ++I)
-      if (SI1->getOperand(I) != SI2->getOperand(I))
+    for (int I = 0, E = SI1->getNumOperands(); I < E; ++I) {
+      Value *SIOp = SI1->getOperand(I);
+      if (isa<UndefValue>(SIOp) &&
+          SIOp->getType() == SI2->getOperand(I)->getType())
+        continue;
+      if (!any_of(SI2->operands(), [SIOp](Value *Op) { return Op == SIOp; }) ||
+          any_of(SI2->operands(), [SI1](Value *Op) { return Op == SI1; }))
         return false;
+    }
     // Check if the second instruction is more defined than the first one.
     NewMask.assign(SI2->getShuffleMask().begin(), SI2->getShuffleMask().end());
     ArrayRef<int> SM1 = SI1->getShuffleMask();
     // Count trailing undefs in the mask to check the final number of used
     // registers.
+    auto &&IsStrictUndef = [](Value *V) {
+      return isa_and_nonnull<UndefValue>(V) && !isa<PoisonValue>(V);
+    };
     unsigned LastUndefsCnt = 0;
+    unsigned VF = cast<VectorType>(SI1->getOperand(0)->getType())
+                      ->getElementCount()
+                      .getKnownMinValue();
+    // Tries to fetch the vector element (scalar value) for \p VecOp using given
+    // index \p Idx. \p RootOp is used to stop lookup through shuffles, if \p
+    // RootOp is a root of the \p VecOp shuffle.
+    auto &&GetVecElem = [VF](Value *&VecOp, int Idx, Value *RootOp) -> Value * {
+      unsigned EVF = VF;
+      while (auto *SVOp = dyn_cast<ShuffleVectorInst>(VecOp)) {
+        VecOp = SVOp->getOperand(Idx / EVF);
+        Idx = SVOp->getMaskValue(Idx % EVF);
+        auto *FTy = dyn_cast<FixedVectorType>(VecOp->getType());
+        if (!FTy)
+          break;
+        EVF = FTy->getElementCount().getKnownMinValue();
+        if (Idx == UndefMaskElem)
+          return PoisonValue::get(
+              cast<VectorType>(VecOp->getType())->getElementType());
+        if (VecOp == RootOp)
+          break;
+      }
+      if (isa<PoisonValue>(VecOp))
+        return PoisonValue::get(
+            cast<VectorType>(VecOp->getType())->getElementType());
+      if (isa<UndefValue>(VecOp))
+        return UndefValue::get(
+            cast<VectorType>(VecOp->getType())->getElementType());
+      if (auto *CV1 = dyn_cast<ConstantVector>(VecOp))
+        return CV1->getAggregateElement(Idx % EVF);
+      return nullptr;
+    };
     for (int I = 0, E = NewMask.size(); I < E; ++I) {
       if (SM1[I] == UndefMaskElem)
         ++LastUndefsCnt;
       else
         LastUndefsCnt = 0;
-      if (NewMask[I] != UndefMaskElem && SM1[I] != UndefMaskElem &&
-          NewMask[I] != SM1[I])
-        return false;
-      if (NewMask[I] == UndefMaskElem)
+      if (NewMask[I] == UndefMaskElem) {
         NewMask[I] = SM1[I];
+        continue;
+      }
+      Value *VecOp1 = SI2->getOperand(NewMask[I] / VF);
+      unsigned VecOp2Pos = SM1[I] / VF;
+      Value *VecOp2 =
+          SM1[I] == UndefMaskElem ? nullptr : SI1->getOperand(VecOp2Pos);
+      if (SM1[I] == UndefMaskElem || (NewMask[I] == SM1[I] && VecOp1 == VecOp2))
+        continue;
+      // Check if one mask can be safely replaced by another.
+      Value *CV1Elem = GetVecElem(VecOp1, NewMask[I], VecOp2);
+      Value *CV2Elem = GetVecElem(VecOp2, SM1[I], VecOp1);
+      if (isa_and_nonnull<PoisonValue>(CV1Elem) ||
+          (IsStrictUndef(CV1Elem) &&
+           (IsStrictUndef(CV2Elem) ||
+            (CV2Elem && isGuaranteedNotToBePoison(CV2Elem)) ||
+            (!CV2Elem && isGuaranteedNotToBePoison(VecOp2))))) {
+        NewMask[I] = SM1[I] % VF + VecOp2Pos * VF;
+        continue;
+      }
+      if ((CV1Elem && CV1Elem == CV2Elem) ||
+          isa_and_nonnull<PoisonValue>(CV2Elem) ||
+          (IsStrictUndef(CV2Elem) &&
+           (IsStrictUndef(CV1Elem) ||
+            (CV1Elem && isGuaranteedNotToBePoison(CV1Elem)) ||
+            (!CV1Elem && isGuaranteedNotToBePoison(VecOp1))))) {
+        NewMask[I] = NewMask[I] % VF + VecOp2Pos * VF;
+        continue;
+      }
+      return false;
     }
     // Check if the last undefs actually change the final number of used vector
     // registers.
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/tsc-s116.ll
@@ -19,21 +19,19 @@
 ; CHECK-LABEL: @s116_modified(
 ; CHECK-NEXT:    [[GEP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 0
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds float, float* [[A]], i64 1
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds float, float* [[A]], i64 2
-; CHECK-NEXT:    [[GEP4:%.*]] = getelementptr inbounds float, float* [[A]], i64 4
-; CHECK-NEXT:    [[LD1:%.*]] = load float, float* [[GEP1]], align 4
+; CHECK-NEXT:    [[GEP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 3
 ; CHECK-NEXT:    [[LD0:%.*]] = load float, float* [[GEP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP2]] to <2 x float>*
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[GEP1]] to <2 x float>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x float>, <2 x float>* [[TMP1]], align 4
-; CHECK-NEXT:    [[LD4:%.*]] = load float, float* [[GEP4]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 4, i32 5, i32 3>
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x float> [[TMP5]], float [[LD4]], i32 3
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[LD1]], i32 0
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[LD1]], i32 1
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    [[TMP10:%.*]] = fmul fast <4 x float> [[TMP6]], [[TMP9]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[GEP3]] to <2 x float>*
+; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> poison, float [[LD0]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP7]], <4 x float> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 2>
+; CHECK-NEXT:    [[TMP10:%.*]] = fmul fast <4 x float> [[TMP9]], [[SHUFFLE]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = bitcast float* [[GEP0]] to <4 x float>*
 ; CHECK-NEXT:    store <4 x float> [[TMP10]], <4 x float>* [[TMP11]], align 4
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll
@@ -288,15 +288,15 @@
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[V1_LANE_3]], i32 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x double> poison, double [[V2_LANE_2]], i32 0
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x double> [[TMP4]], double [[V2_LANE_1]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_2]], i32 2
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x double> [[TMP6]], double [[V2_LANE_0]], i32 3
-; CHECK-NEXT:    [[TMP8:%.*]] = fmul <4 x double> [[TMP3]], [[TMP7]]
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP8]], <4 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <4 x double> [[TMP5]], double [[V2_LANE_0]], i32 2
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP6]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 2>
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x double> [[TMP3]], [[SHUFFLE]]
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_0]])
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_1]])
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_2]])
 ; CHECK-NEXT:    call void @use(double [[V1_LANE_3]])
-; CHECK-NEXT:    store <9 x double> [[TMP9]], <9 x double>* [[PTR_1]], align 8
+; CHECK-NEXT:    store <9 x double> [[TMP8]], <9 x double>* [[PTR_1]], align 8
 ; CHECK-NEXT:    ret void
 ;
 bb:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865-inseltpoison.ll
@@ -11,7 +11,7 @@
 ; CHECK-NEXT:    [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; CHECK-NEXT:    [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[TMP6]], <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR35865.ll
@@ -11,7 +11,7 @@
 ; CHECK-NEXT:    [[TMP4:%.*]] = fpext <2 x half> [[TMP3]] to <2 x float>
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> undef, <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
+; CHECK-NEXT:    [[VECINS_I_5_I1:%.*]] = shufflevector <8 x i32> undef, <8 x i32> [[TMP6]], <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 undef, i32 undef>
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cmp-swapped-pred.ll
@@ -7,15 +7,15 @@
 ; CHECK-NEXT:    [[CALL:%.*]] = load i16, i16* undef, align 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i16> <i16 poison, i16 0, i16 0, i16 0, i16 poison, i16 0, i16 0, i16 0>, i16 [[CALL37:%.*]], i32 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[CALL]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 poison, i16 0, i16 0, i16 poison, i16 0>, i16 [[CALL37]], i32 3
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[CALL37]], i32 6
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt <8 x i16> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt <8 x i16> [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 5, i32 14, i32 7>
-; CHECK-NEXT:    [[TMP7:%.*]] = zext <8 x i1> [[TMP6]] to <8 x i16>
-; CHECK-NEXT:    [[TMP8:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP7]])
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = add i16 [[TMP8]], 0
-; CHECK-NEXT:    ret i16 [[OP_EXTRA]]
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 poison, i16 0, i16 0, i16 0, i16 poison>, i16 [[CALL37]], i32 3
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 3, i32 6>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt <8 x i16> [[TMP1]], [[SHUFFLE]]
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt <8 x i16> [[TMP1]], [[SHUFFLE]]
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <8 x i1> [[TMP3]], <8 x i1> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 11, i32 12, i32 5, i32 14, i32 7>
+; CHECK-NEXT:    [[TMP6:%.*]] = zext <8 x i1> [[TMP5]] to <8 x i16>
+; CHECK-NEXT:    [[TMP7:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP6]])
+; CHECK-NEXT:    [[OP_RDX:%.*]] = add i16 [[TMP7]], 0
+; CHECK-NEXT:    ret i16 [[OP_RDX]]
 ;
 entry:
   %call = load i16, i16* undef, align 2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/broadcast_long.ll
@@ -18,7 +18,7 @@
 ; CHECK-NEXT:    [[A0:%.*]] = load i32, i32* [[A:%.*]], align 8
 ; CHECK-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds i32, i32* [[S:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <8 x i32> poison, i32 [[A0]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP0]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[IDXS0]] to <8 x i32>*
 ; CHECK-NEXT:    store <8 x i32> [[SHUFFLE]], <8 x i32>* [[TMP1]], align 8
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-same-lane-insert.ll
@@ -10,9 +10,10 @@
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x float> undef, <2 x float> [[TMP3]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> zeroinitializer, float 0.000000e+00, i64 0
 ; CHECK-NEXT:    store <2 x float> zeroinitializer, ptr null, align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> <i32 3, i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> [[TMP2]], <2 x i32> <i32 3, i32 1>
 ; CHECK-NEXT:    store <2 x float> zeroinitializer, ptr null, align 4
 ; CHECK-NEXT:    ret void
 ;
@@ -43,10 +44,9 @@
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = fcmp olt float [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT:    store <2 x float> [[TMP3]], ptr null, align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    store <2 x float> [[TMP8]], ptr null, align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP2]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    store <2 x float> [[TMP9]], ptr null, align 4
 ; CHECK-NEXT:    ret void
 ;
   %1 = getelementptr inbounds float, ptr undef, i32 2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp-as-alternate-ops.ll
@@ -53,11 +53,9 @@
 ; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> zeroinitializer
 ; CHECK-NEXT:    [[TMP6:%.*]] = fadd <4 x float> [[TMP5]], zeroinitializer
 ; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP7]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x float> zeroinitializer, <2 x float> [[TMP9]], <2 x i32> <i32 2, i32 3>
-; CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP8]], 0
-; CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP10]], 1
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x float> [[TMP6]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } zeroinitializer, <2 x float> [[TMP7]], 0
+; CHECK-NEXT:    [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[TMP8]], 1
 ; CHECK-NEXT:    ret { <2 x float>, <2 x float> } zeroinitializer
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/commutativity.ll
@@ -97,9 +97,9 @@
 ; AVX-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[SHUFFLE]], [[SHUFFLE1]]
 ; AVX-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[B:%.*]], i32 1
 ; AVX-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> [[TMP4]], i32 [[C]], i32 2
-; AVX-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[A]], i32 3
-; AVX-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[TMP3]], [[TMP6]]
-; AVX-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16
+; AVX-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
+; AVX-NEXT:    [[TMP6:%.*]] = xor <4 x i32> [[TMP3]], [[SHUFFLE2]]
+; AVX-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* bitcast ([32 x i32]* @cle32 to <4 x i32>*), align 16
 ; AVX-NEXT:    ret void
 ;
   %add1 = add i32 %c, %a
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_7zip.ll
@@ -13,19 +13,20 @@
 ; CHECK-NEXT:    [[RANGE20_I:%.*]] = getelementptr inbounds [[STRUCT_CLZMADEC_1_28_55_82_103_124_145_166_181_196_229_259_334:%.*]], %struct.CLzmaDec.1.28.55.82.103.124.145.166.181.196.229.259.334* [[P:%.*]], i64 0, i32 4
 ; CHECK-NEXT:    br label [[DO_BODY66_I:%.*]]
 ; CHECK:       do.body66.i:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ [[TMP4:%.*]], [[DO_COND_I:%.*]] ], [ undef, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x i32> <i32 undef, i32 poison>, <2 x i32> [[TMP1]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    br i1 undef, label [[DO_COND_I]], label [[IF_ELSE_I:%.*]]
 ; CHECK:       if.else.i:
-; CHECK-NEXT:    [[TMP2:%.*]] = sub <2 x i32> [[TMP1]], undef
+; CHECK-NEXT:    [[TMP3:%.*]] = sub <2 x i32> [[TMP1]], undef
 ; CHECK-NEXT:    br label [[DO_COND_I]]
 ; CHECK:       do.cond.i:
-; CHECK-NEXT:    [[TMP3]] = phi <2 x i32> [ [[TMP2]], [[IF_ELSE_I]] ], [ [[TMP1]], [[DO_BODY66_I]] ]
+; CHECK-NEXT:    [[TMP4]] = phi <2 x i32> [ [[TMP3]], [[IF_ELSE_I]] ], [ [[TMP2]], [[DO_BODY66_I]] ]
 ; CHECK-NEXT:    br i1 undef, label [[DO_BODY66_I]], label [[DO_END1006_I:%.*]]
 ; CHECK:       do.end1006.i:
-; CHECK-NEXT:    [[TMP4:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>*
-; CHECK-NEXT:    store <2 x i32> [[TMP4]], <2 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = select <2 x i1> undef, <2 x i32> undef, <2 x i32> [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[RANGE20_I]] to <2 x i32>*
+; CHECK-NEXT:    store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_lencod.ll
@@ -129,7 +129,7 @@
 ; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds double, double* [[INBUF:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[INBUF]] to <2 x double>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> [[TMP1]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <2 x double> <double poison, double undef>, <2 x double> [[TMP1]], <2 x i32> <i32 3, i32 1>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast double* [[ARRAYIDX44]] to <2 x double>*
 ; CHECK-NEXT:    store <2 x double> [[TMP3]], <2 x double>* [[TMP4]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -20,15 +20,16 @@
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV266:%.*]] = phi i64 [ 0, [[BB1]] ], [ [[INDVARS_IV_NEXT267:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP6:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP4]])
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x double> [ [[TMP3]], [[BB1]] ], [ [[TMP7:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> <i32 2, i32 1>
+; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
 ; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> <i32 1, i32 undef>
-; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP5]])
+; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> undef, <2 x double> [[TMP4]], <2 x i32> <i32 3, i32 1>
+; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[TMP6]])
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
 ; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT:    [[TMP6]] = fadd <2 x double> [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[TMP7]] = fadd <2 x double> [[TMP2]], [[TMP4]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT267]], 256
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN:%.*]], label [[FOR_BODY]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract-scalar-from-undef.ll
@@ -6,17 +6,17 @@
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[TMP7:%.*]], i32 2
 ; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> [[TMP0]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <8 x i32> [[TMP2]], i32 undef, i32 4
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP3]], <8 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 2, i32 3, i32 undef, i32 4, i32 undef>
-; CHECK-NEXT:    [[TMP4:%.*]] = sub nsw <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 0>, [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 0>, [[SHUFFLE]]
-; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP5]], <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP7:%.*]] = add <8 x i32> zeroinitializer, [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = xor <8 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP8]])
-; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> zeroinitializer)
-; CHECK-NEXT:    [[OP_RDX:%.*]] = add i32 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 2, i32 3, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i32> <i32 poison, i32 poison, i32 undef, i32 poison, i32 poison, i32 undef, i32 poison, i32 undef>, <8 x i32> [[TMP2]], <8 x i32> <i32 8, i32 9, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <8 x i32> [[TMP3]], i32 undef, i32 6
+; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 0>, [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 0>, [[TMP4]]
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> [[TMP6]], <8 x i32> <i32 0, i32 9, i32 2, i32 3, i32 12, i32 13, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP8:%.*]] = add <8 x i32> zeroinitializer, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = xor <8 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP9]])
+; CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[OP_RDX:%.*]] = add i32 [[TMP10]], [[TMP11]]
 ; CHECK-NEXT:    [[TMP64:%.*]] = zext i32 [[OP_RDX]] to i64
 ; CHECK-NEXT:    ret i64 [[TMP64]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector-inseltpoison.ll
@@ -305,7 +305,7 @@
 ; CHECK-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <4 x float> poison, <4 x float> [[TMP18]], <4 x i32> <i32 undef, i32 undef, i32 4, i32 5>
 ; CHECK-NEXT:    ret <4 x float> [[RD1]]
 ;
   %c0 = extractelement <4 x i32> %c, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-element-build-vector.ll
@@ -44,7 +44,7 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 undef, i32 3>
-; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <8 x float> undef, <8 x float> [[TMP3]], <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 6, i32 15>
+; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <8 x float> undef, <8 x float> [[TMP3]], <8 x i32> <i32 8, i32 undef, i32 10, i32 undef, i32 12, i32 undef, i32 undef, i32 15>
 ; CHECK-NEXT:    ret <8 x float> [[RD1]]
 ;
   %c0 = extractelement <4 x i32> %c, i32 0
@@ -340,7 +340,7 @@
 ; CHECK-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP11]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP18]], <4 x i32> <i32 undef, i32 undef, i32 4, i32 5>
 ; CHECK-NEXT:    ret <4 x float> [[RD1]]
 ;
   %c0 = extractelement <4 x i32> %c, i32 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insert-shuffle.ll
@@ -12,10 +12,9 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[X]] to <2 x float>*
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 16
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <4 x i32> <i32 1, i32 0, i32 0, i32 1>
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 1
-; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> poison, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
-; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[SHUFFLE1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> <float poison, float undef, float poison, float undef>, float [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x float> [[TMP4]], float [[TMP1]], i32 2
+; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x float> [[SHUFFLE]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7:%.*]] = fadd <4 x float> [[TMP6]], undef
 ; CHECK-NEXT:    [[TMP8:%.*]] = fadd <4 x float> [[TMP7]], undef
 ; CHECK-NEXT:    [[TMP9:%.*]] = fadd <4 x float> [[TMP8]], undef
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/jumbled-load-multiuse.ll
@@ -9,10 +9,11 @@
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([4 x i32]* @b to <4 x i32>*), align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> [[TMP0]], zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> <i32 8, i32 poison, i32 ptrtoint (i32 ()* @fn1 to i32), i32 ptrtoint (i32 ()* @fn1 to i32)>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
-; CHECK-NEXT:    store <4 x i32> [[SHUFFLE]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> <i32 8, i32 poison, i32 ptrtoint (i32 ()* @fn1 to i32), i32 poison>, <4 x i32> [[TMP0]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SHUFFLE]], <4 x i32> <i32 0, i32 6, i32 0, i32 0>
+; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
+; CHECK-NEXT:    store <4 x i32> [[SHUFFLE1]], <4 x i32>* bitcast ([4 x i32]* @a to <4 x i32>*), align 4
 ; CHECK-NEXT:    ret i32 0
 ;
   entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
@@ -10,7 +10,7 @@
 ; CHECK:       bb2.loopexit:
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ [[SHUFFLE:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ [[TMP10:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP5:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ]
@@ -34,20 +34,20 @@
 ; CHECK-NEXT:    br i1 poison, label [[BB7]], label [[BB6]]
 ; CHECK:       bb9:
 ; CHECK-NEXT:    [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x i32> [ [[SHUFFLE1:%.*]], [[BB10]] ], [ [[TMP11:%.*]], [[BB12]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x i32> [ [[SHUFFLE:%.*]], [[BB10]] ], [ [[TMP12:%.*]], [[BB12]] ]
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[SHUFFLE]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> poison, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP10]] = shufflevector <4 x i32> poison, <4 x i32> [[TMP9]], <4 x i32> <i32 undef, i32 undef, i32 4, i32 5>
 ; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb10:
-; CHECK-NEXT:    [[TMP10:%.*]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ]
 ; CHECK-NEXT:    [[LANDING_PAD68:%.*]] = landingpad { i8*, i32 }
 ; CHECK-NEXT:    cleanup
-; CHECK-NEXT:    [[SHUFFLE1]] = shufflevector <2 x i32> [[TMP10]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT:    [[SHUFFLE]] = shufflevector <2 x i32> [[TMP11]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
 ; CHECK-NEXT:    br label [[BB9]]
 ; CHECK:       bb11:
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb12:
-; CHECK-NEXT:    [[TMP11]] = phi <2 x i32> [ [[TMP6]], [[BB7]] ]
+; CHECK-NEXT:    [[TMP12]] = phi <2 x i32> [ [[TMP6]], [[BB7]] ]
 ; CHECK-NEXT:    [[LANDING_PAD149:%.*]] = landingpad { i8*, i32 }
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    br label [[BB9]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/load-partial-vector-shuffle.ll
@@ -38,10 +38,11 @@
 ; AVX-NEXT:    [[T2:%.*]] = load i16, ptr [[ARRAYIDX2]], align 2
 ; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[ARRAYIDX3]], align 2
 ; AVX-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; AVX-NEXT:    [[VECINIT2_I_I2:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 9, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i16> undef, <8 x i16> [[TMP3]], <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; AVX-NEXT:    [[VECINIT2_I_I2:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 9, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; AVX-NEXT:    [[VECINIT3_I_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I_I2]], i16 [[T2]], i64 3
-; AVX-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; AVX-NEXT:    [[VECINIT7_I_I1:%.*]] = shufflevector <8 x i16> [[VECINIT3_I_I]], <8 x i16> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
+; AVX-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; AVX-NEXT:    [[VECINIT7_I_I1:%.*]] = shufflevector <8 x i16> [[VECINIT3_I_I]], <8 x i16> [[TMP5]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
 ; AVX-NEXT:    [[T7:%.*]] = bitcast <8 x i16> [[VECINIT7_I_I1]] to <2 x i64>
 ; AVX-NEXT:    ret <2 x i64> [[T7]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll
@@ -15,13 +15,13 @@
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[ADD78_1]], i32 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 [[SUB86_1]], i32 3
 ; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 [[ADD78_2]], i32 4
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i32> [[TMP4]], <16 x i32> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 3, i32 4, i32 0, i32 5, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
 ; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <16 x i32> poison, i32 [[SUB86_1]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 [[ADD78_1]], i32 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[SUB102_1]], i32 3
 ; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB102_3]], i32 4
-; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 4>
+; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 3, i32 4, i32 0, i32 0, i32 0, i32 0, i32 5, i32 0, i32 0, i32 5>
 ; CHECK-NEXT:    [[TMP10:%.*]] = add nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = sub nsw <16 x i32> [[SHUFFLE]], [[SHUFFLE1]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <16 x i32> [[TMP10]], <16 x i32> [[TMP11]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/partail.ll
@@ -17,23 +17,24 @@
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[SUB14]], i32 1
 ; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt <4 x i32> [[SHUFFLE]], <i32 0, i32 -1, i32 -5, i32 -9>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP0]], <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp slt <4 x i32> [[TMP3]], undef
-; CHECK-NEXT:    [[TMP5:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP3]], <4 x i32> undef
-; CHECK-NEXT:    [[TMP6:%.*]] = sext <4 x i32> [[TMP5]] to <4 x i64>
-; CHECK-NEXT:    [[TMP7:%.*]] = trunc <4 x i64> [[TMP6]] to <4 x i32>
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[TMP7]], i32 0
-; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
-; CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP9]]
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32> [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
-; CHECK-NEXT:    [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP11]]
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2
-; CHECK-NEXT:    [[TMP13:%.*]] = sext i32 [[TMP12]] to i64
-; CHECK-NEXT:    [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP13]]
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3
-; CHECK-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
-; CHECK-NEXT:    [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i32> <i32 poison, i32 undef, i32 undef, i32 undef>, i32 [[SHR15]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt <4 x i32> [[TMP4]], undef
+; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP4]], <4 x i32> undef
+; CHECK-NEXT:    [[TMP7:%.*]] = sext <4 x i32> [[TMP6]] to <4 x i64>
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc <4 x i64> [[TMP7]] to <4 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
+; CHECK-NEXT:    [[ARRAYIDX31:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
+; CHECK-NEXT:    [[ARRAYIDX31_1:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2
+; CHECK-NEXT:    [[TMP14:%.*]] = sext i32 [[TMP13]] to i64
+; CHECK-NEXT:    [[ARRAYIDX31_2:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3
+; CHECK-NEXT:    [[TMP16:%.*]] = sext i32 [[TMP15]] to i64
+; CHECK-NEXT:    [[ARRAYIDX31_3:%.*]] = getelementptr inbounds i16*, i16** undef, i64 [[TMP16]]
 ; CHECK-NEXT:    unreachable
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/phi-undef-input.ll
@@ -14,7 +14,7 @@
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 poison, i8 poison, i8 poison>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 undef, i8 undef, i8 undef>, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
 ; CHECK-NEXT:    ret i32 [[TMP6]]
@@ -51,7 +51,7 @@
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 poison>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 undef, i8 undef>, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
 ; CHECK-NEXT:    ret i32 [[TMP6]]
@@ -88,7 +88,7 @@
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 0, i8 poison>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 0, i8 undef>, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
 ; CHECK-NEXT:    ret i32 [[TMP6]]
@@ -126,7 +126,7 @@
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 poison>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
 ; CHECK-NEXT:    ret i32 [[TMP6]]
@@ -164,7 +164,7 @@
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG3:%.*]], i32 3
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 poison, i8 poison, i8 poison>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 poison, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
 ; CHECK-NEXT:    ret i32 [[TMP6]]
@@ -201,7 +201,7 @@
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x i8> [[TMP2]], i8 [[ARG2:%.*]], i32 3
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 poison>, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <4 x i8> [ [[TMP3]], [[BB2]] ], [ <i8 0, i8 0, i8 poison, i8 undef>, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
 ; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
 ; CHECK-NEXT:    ret i32 [[TMP6]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll
@@ -9,7 +9,7 @@
 ; CHECK-NEXT:    br label [[T:%.*]]
 ; CHECK:       t:
 ; CHECK-NEXT:    [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> <i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[LD]], <8 x i16> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP0:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[P0]] to <8 x i16>*
 ; CHECK-NEXT:    store <8 x i16> [[TMP0]], <8 x i16>* [[TMP1]], align 2
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-undefs.ll
@@ -6,12 +6,12 @@
 ; CHECK-NEXT:  for.cond.preheader:
 ; CHECK-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[FOR_INC_PREHEADER:%.*]]
 ; CHECK:       for.inc.preheader:
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison>, i32 [[TMP0:%.*]], i32 6
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 undef>, i32 [[TMP0:%.*]], i32 6
 ; CHECK-NEXT:    br i1 false, label [[FOR_END]], label [[L1_PREHEADER:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    [[DOTPR:%.*]] = phi i32 [ 0, [[FOR_INC_PREHEADER]] ], [ 0, [[FOR_COND_PREHEADER:%.*]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[DOTPR]], i32 0
-; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0>
+; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[L1_PREHEADER]]
 ; CHECK:       L1.preheader:
 ; CHECK-NEXT:    [[TMP3:%.*]] = phi <8 x i32> [ [[SHUFFLE]], [[FOR_END]] ], [ [[TMP1]], [[FOR_INC_PREHEADER]] ]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vectorize-widest-phis.ll
@@ -8,11 +8,11 @@
 ; CHECK-NEXT:    [[SUB:%.*]] = fsub float 6.553500e+04, undef
 ; CHECK-NEXT:    br label [[BB1:%.*]]
 ; CHECK:       bb1:
-; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> poison, float [[SUB]], i32 0
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> <float poison, float poison, float undef, float undef>, float [[SUB]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> [[TMP0]], float [[CONV]], i32 1
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP14:%.*]], [[BB3:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <4 x float> [ [[TMP1]], [[BB1]] ], [ [[TMP15:%.*]], [[BB3:%.*]] ]
 ; CHECK-NEXT:    [[TMP3:%.*]] = load double, double* undef, align 8
 ; CHECK-NEXT:    br i1 undef, label [[BB3]], label [[BB4:%.*]]
 ; CHECK:       bb4:
@@ -24,12 +24,13 @@
 ; CHECK-NEXT:    [[TMP8:%.*]] = fadd <2 x double> [[TMP5]], [[TMP6]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> [[TMP8]], <2 x i32> <i32 0, i32 3>
 ; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <2 x double> [[TMP9]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP11:%.*]] = fcmp ogt <4 x double> [[TMP10]], [[TMP4]]
-; CHECK-NEXT:    [[TMP12:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float>
-; CHECK-NEXT:    [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x float> [[TMP2]], <4 x float> [[TMP12]]
+; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> <double poison, double poison, double undef, double undef>, <4 x double> [[TMP10]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+; CHECK-NEXT:    [[TMP12:%.*]] = fcmp ogt <4 x double> [[TMP11]], [[TMP4]]
+; CHECK-NEXT:    [[TMP13:%.*]] = fptrunc <4 x double> [[TMP10]] to <4 x float>
+; CHECK-NEXT:    [[TMP14:%.*]] = select <4 x i1> [[TMP12]], <4 x float> [[TMP2]], <4 x float> [[TMP13]]
 ; CHECK-NEXT:    br label [[BB3]]
 ; CHECK:       bb3:
-; CHECK-NEXT:    [[TMP14]] = phi <4 x float> [ [[TMP13]], [[BB4]] ], [ [[TMP2]], [[BB2]] ]
+; CHECK-NEXT:    [[TMP15]] = phi <4 x float> [ [[TMP14]], [[BB4]] ], [ [[TMP2]], [[BB2]] ]
 ; CHECK-NEXT:    br label [[BB2]]
 ;
 entry: