diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14321,78 +14321,42 @@
 
 template <typename T>
 static bool tryToVectorizeSequence(
-    SmallVectorImpl<T *> &Incoming, function_ref<bool(T *, T *)> Comparator,
+    SmallVectorImpl<T *> &UnsortedSeeds,
+    function_ref<bool(T *, T *)> Comparator,
     function_ref<bool(T *, T *)> AreCompatible,
-    function_ref<bool(ArrayRef<T *>, bool)> TryToVectorizeHelper,
-    bool MaxVFOnly, BoUpSLP &R) {
+    function_ref<bool(ArrayRef<T *>, bool)> TryToVectorizeHelper, BoUpSLP &R) {
+  // We separate the seeds into groups based on their types.
+  MapVector<Type *, SmallVector<T *>> SeedsMap;
+  for (T *Seed : UnsortedSeeds)
+    SeedsMap[Seed->getType()].push_back(Seed);
+  // Sort the seeds that correspond to each type.
+  for (auto &Pair : SeedsMap)
+    stable_sort(Pair.second, Comparator);
+  // For all types try to vectorize the seed vector.
   bool Changed = false;
-  // Sort by type, parent, operands.
-  stable_sort(Incoming, Comparator);
-
-  // Try to vectorize elements base on their type.
-  SmallVector<T *> Candidates;
-  for (auto *IncIt = Incoming.begin(), *E = Incoming.end(); IncIt != E;) {
-    // Look for the next elements with the same type, parent and operand
-    // kinds.
-    auto *SameTypeIt = IncIt;
-    while (SameTypeIt != E && AreCompatible(*SameTypeIt, *IncIt))
-      ++SameTypeIt;
-
-    // Try to vectorize them.
-    unsigned NumElts = (SameTypeIt - IncIt);
-    LLVM_DEBUG(dbgs() << "SLP: Trying to vectorize starting at nodes ("
-                      << NumElts << ")\n");
-    // The vectorization is a 3-state attempt:
-    // 1. Try to vectorize instructions with the same/alternate opcodes with the
-    // size of maximal register at first.
-    // 2. Try to vectorize remaining instructions with the same type, if
-    // possible. This may result in the better vectorization results rather than
-    // if we try just to vectorize instructions with the same/alternate opcodes.
-    // 3. Final attempt to try to vectorize all instructions with the
-    // same/alternate ops only, this may result in some extra final
-    // vectorization.
-    if (NumElts > 1 &&
-        TryToVectorizeHelper(ArrayRef(IncIt, NumElts), MaxVFOnly)) {
-      // Success start over because instructions might have been changed.
+  for (auto &Pair : SeedsMap) {
+    auto &Seeds = Pair.second;
+    if (Seeds.empty())
+      continue;
+    // Try the full Seeds vector first.
+    if (TryToVectorizeHelper(Seeds, /*MaxVFOnly=*/false)) {
       Changed = true;
-    } else {
-      /// \Returns the minimum number of elements that we will attempt to
-      /// vectorize.
-      auto GetMinNumElements = [&R](Value *V) {
-        unsigned EltSize = R.getVectorElementSize(V);
-        return std::max(2U, R.getMaxVecRegSize() / EltSize);
-      };
-      if (NumElts < GetMinNumElements(*IncIt) &&
-          (Candidates.empty() ||
-           Candidates.front()->getType() == (*IncIt)->getType())) {
-        Candidates.append(IncIt, std::next(IncIt, NumElts));
-      }
+      continue;
     }
-    // Final attempt to vectorize instructions with the same types.
-    if (Candidates.size() > 1 &&
-        (SameTypeIt == E || (*SameTypeIt)->getType() != (*IncIt)->getType())) {
-      if (TryToVectorizeHelper(Candidates, /*MaxVFOnly=*/false)) {
-        // Success start over because instructions might have been changed.
+    // If the full vector failed, slice the Seeds according to `AreCompatible()`
+    uint32_t SliceSz;
+    for (auto SliceBegin = Seeds.begin(), ItE = Seeds.end(); SliceBegin != ItE;
+         SliceBegin += SliceSz) {
+      auto SliceEnd = std::next(SliceBegin);
+      while (SliceEnd != ItE && AreCompatible(*SliceEnd, *std::prev(SliceEnd)))
+        ++SliceEnd;
+      SliceSz = SliceEnd - SliceBegin;
+      if (SliceSz <= 1)
+        continue;
+      ArrayRef<T *> SeedsSlice(SliceBegin, SliceEnd);
+      if (TryToVectorizeHelper(SeedsSlice, /*MaxVFOnly=*/false))
         Changed = true;
-      } else if (MaxVFOnly) {
-        // Try to vectorize using small vectors.
-        for (auto *It = Candidates.begin(), *End = Candidates.end();
-             It != End;) {
-          auto *SameTypeIt = It;
-          while (SameTypeIt != End && AreCompatible(*SameTypeIt, *It))
-            ++SameTypeIt;
-          unsigned NumElts = (SameTypeIt - It);
-          if (NumElts > 1 && TryToVectorizeHelper(ArrayRef(It, NumElts),
-                                                  /*MaxVFOnly=*/false))
-            Changed = true;
-          It = SameTypeIt;
-        }
-      }
-      Candidates.clear();
     }
-
-    // Start over at the next instruction of a different type (or the end).
-    IncIt = SameTypeIt;
   }
   return Changed;
 }
@@ -14498,7 +14462,7 @@
           return false;
         return tryToVectorizeList(Candidates, R, MaxVFOnly);
       },
-      /*MaxVFOnly=*/true, R);
+      R);
   return Changed;
 }
 
@@ -14671,7 +14635,7 @@
         [this, &R](ArrayRef<Value *> Candidates, bool MaxVFOnly) {
           return tryToVectorizeList(Candidates, R, MaxVFOnly);
         },
-        /*MaxVFOnly=*/true, R);
+        R);
     Changed |= HaveVectorizedPhiNodes;
     VisitedInstrs.insert(Incoming.begin(), Incoming.end());
   } while (HaveVectorizedPhiNodes);
@@ -14984,7 +14948,7 @@
         [this, &R](ArrayRef<StoreInst *> Candidates, bool) {
           return vectorizeStores(Candidates, R);
         },
-        /*MaxVFOnly=*/false, R);
+        R);
   }
   return Changed;
 }
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/extractelements-to-shuffle.ll
@@ -3,27 +3,22 @@
 
 define void @test(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP1:%.*]], i64 0
-; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[TMP4]], 0
-; CHECK-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i64> [[TMP0:%.*]], i64 0
-; CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP7]], 0
-; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP2:%.*]], i64 0
-; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1
-; CHECK-NEXT:    [[TMP12:%.*]] = or i64 [[TMP10]], [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = trunc i64 [[TMP12]] to i32
-; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP0]], i64 0
-; CHECK-NEXT:    [[TMP15:%.*]] = or i64 [[TMP14]], 0
-; CHECK-NEXT:    [[TMP16:%.*]] = trunc i64 [[TMP15]] to i32
-; CHECK-NEXT:    br label [[TMP17:%.*]]
-; CHECK:       17:
-; CHECK-NEXT:    [[TMP18:%.*]] = phi i32 [ [[TMP22:%.*]], [[TMP17]] ], [ [[TMP6]], [[TMP3:%.*]] ]
-; CHECK-NEXT:    [[TMP19:%.*]] = phi i32 [ 0, [[TMP17]] ], [ [[TMP9]], [[TMP3]] ]
-; CHECK-NEXT:    [[TMP20:%.*]] = phi i32 [ 0, [[TMP17]] ], [ [[TMP13]], [[TMP3]] ]
-; CHECK-NEXT:    [[TMP21:%.*]] = phi i32 [ 0, [[TMP17]] ], [ [[TMP16]], [[TMP3]] ]
-; CHECK-NEXT:    [[TMP22]] = or i32 [[TMP18]], 0
-; CHECK-NEXT:    br label [[TMP17]]
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i64> [[TMP1:%.*]], <2 x i64> [[TMP0:%.*]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP5:%.*]] = or <2 x i64> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = trunc <2 x i64> [[TMP5]] to <2 x i32>
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i64> [[TMP2:%.*]], <2 x i64> [[TMP0]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> <i64 poison, i64 0>, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT:    [[TMP9:%.*]] = or <2 x i64> [[TMP7]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc <2 x i64> [[TMP9]] to <2 x i32>
+; CHECK-NEXT:    br label [[TMP11:%.*]]
+; CHECK:       11:
+; CHECK-NEXT:    [[TMP12:%.*]] = phi <2 x i32> [ [[TMP17:%.*]], [[TMP11]] ], [ [[TMP6]], [[TMP3:%.*]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = phi <2 x i32> [ zeroinitializer, [[TMP11]] ], [ [[TMP10]], [[TMP3]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x i32> [[TMP12]], <2 x i32> <i32 poison, i32 0>, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP15:%.*]] = or <2 x i32> zeroinitializer, [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = add <2 x i32> zeroinitializer, [[TMP14]]
+; CHECK-NEXT:    [[TMP17]] = shufflevector <2 x i32> [[TMP15]], <2 x i32> [[TMP16]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    br label [[TMP11]]
 ;
   %4 = extractelement <2 x i64> %1, i64 0
   %5 = or i64 %4, 0
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll
@@ -10,7 +10,7 @@
 ; CHECK:       bb2.loopexit:
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ [[TMP8:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = phi <4 x i32> [ [[TMP7:%.*]], [[BB9:%.*]] ], [ poison, [[BB2_LOOPEXIT:%.*]] ]
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb3:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi <2 x i32> [ [[TMP3:%.*]], [[BB6:%.*]] ], [ poison, [[BB1:%.*]] ]
@@ -32,19 +32,18 @@
 ; CHECK-NEXT:    br i1 poison, label [[BB7]], label [[BB6]]
 ; CHECK:       bb9:
 ; CHECK-NEXT:    [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = phi <2 x i32> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP10:%.*]], [[BB12]] ]
-; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 1, i32 0, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP8]] = shufflevector <4 x i32> [[TMP7]], <4 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ]
+; CHECK-NEXT:    [[TMP7]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 0>
 ; CHECK-NEXT:    br label [[BB2]]
 ; CHECK:       bb10:
-; CHECK-NEXT:    [[TMP9]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ]
+; CHECK-NEXT:    [[TMP8]] = phi <2 x i32> [ [[TMP1]], [[BB3]] ]
 ; CHECK-NEXT:    [[LANDING_PAD68:%.*]] = landingpad { ptr, i32 }
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    br label [[BB9]]
 ; CHECK:       bb11:
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb12:
-; CHECK-NEXT:    [[TMP10]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ]
+; CHECK-NEXT:    [[TMP9]] = phi <2 x i32> [ [[TMP4]], [[BB7]] ]
 ; CHECK-NEXT:    [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 }
 ; CHECK-NEXT:    cleanup
 ; CHECK-NEXT:    br label [[BB9]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-nodes-to-shuffle.ll
@@ -7,46 +7,42 @@
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> poison, i64 [[P0:%.*]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[P1:%.*]], i32 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i64> [[TMP1]], [[TMP1]]
-; CHECK-NEXT:    [[A2:%.*]] = add i64 [[P2:%.*]], [[P2]]
-; CHECK-NEXT:    [[A3:%.*]] = add i64 [[P3:%.*]], [[P3]]
-; CHECK-NEXT:    [[TMP3:%.*]] = mul <2 x i64> [[TMP1]], [[TMP1]]
-; CHECK-NEXT:    [[M2:%.*]] = mul i64 [[P2]], [[P2]]
-; CHECK-NEXT:    [[M3:%.*]] = mul i64 [[P3]], [[P3]]
-; CHECK-NEXT:    [[TMP4:%.*]] = sdiv <2 x i64> [[TMP1]], [[TMP1]]
-; CHECK-NEXT:    [[D2:%.*]] = sdiv i64 [[P2]], [[P2]]
-; CHECK-NEXT:    [[D3:%.*]] = sdiv i64 [[P3]], [[P3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i32 0
-; CHECK-NEXT:    [[S0:%.*]] = sub i64 [[TMP6]], [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP3]], i32 1
-; CHECK-NEXT:    [[S1:%.*]] = sub i64 [[TMP8]], [[TMP7]]
-; CHECK-NEXT:    [[S2:%.*]] = sub i64 [[M2]], [[D2]]
-; CHECK-NEXT:    [[S3:%.*]] = sub i64 [[M3]], [[D3]]
-; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
-; CHECK-NEXT:    [[SHL1:%.*]] = shl i64 [[TMP9]], [[S0]]
-; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
-; CHECK-NEXT:    [[SHL2:%.*]] = shl i64 [[TMP10]], [[S1]]
-; CHECK-NEXT:    [[SHL3:%.*]] = shl i64 [[A2]], [[S2]]
-; CHECK-NEXT:    [[SHL4:%.*]] = shl i64 [[A3]], [[S3]]
-; CHECK-NEXT:    [[O0:%.*]] = or i64 [[TMP9]], [[TMP10]]
+; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <2 x i64> poison, i64 [[P2:%.*]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[P3:%.*]], i32 1
+; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i64> [[TMP4]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = mul <2 x i64> [[TMP1]], [[TMP1]]
+; CHECK-NEXT:    [[TMP7:%.*]] = mul <2 x i64> [[TMP4]], [[TMP4]]
+; CHECK-NEXT:    [[D0:%.*]] = sdiv i64 [[P0]], [[P0]]
+; CHECK-NEXT:    [[D1:%.*]] = sdiv i64 [[P1]], [[P1]]
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i64> [[TMP6]], i32 0
+; CHECK-NEXT:    [[S0:%.*]] = sub i64 [[TMP8]], [[D0]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP6]], i32 1
+; CHECK-NEXT:    [[S1:%.*]] = sub i64 [[TMP9]], [[D1]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
+; CHECK-NEXT:    [[SHL1:%.*]] = shl i64 [[TMP10]], [[S0]]
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
+; CHECK-NEXT:    [[SHL2:%.*]] = shl i64 [[TMP11]], [[S1]]
+; CHECK-NEXT:    [[O0:%.*]] = or i64 [[TMP10]], [[TMP11]]
 ; CHECK-NEXT:    [[TT0:%.*]] = trunc i64 [[O0]] to i32
-; CHECK-NEXT:    [[O1:%.*]] = or i64 [[TMP6]], [[TMP8]]
+; CHECK-NEXT:    [[O1:%.*]] = or i64 [[TMP8]], [[TMP9]]
 ; CHECK-NEXT:    [[TT1:%.*]] = trunc i64 [[O1]] to i32
-; CHECK-NEXT:    [[O2:%.*]] = or i64 [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TT2:%.*]] = trunc i64 [[O2]] to i32
-; CHECK-NEXT:    [[O3:%.*]] = or i64 [[TMP6]], [[TMP8]]
-; CHECK-NEXT:    [[TT3:%.*]] = trunc i64 [[O3]] to i32
+; CHECK-NEXT:    [[TMP12:%.*]] = sdiv <2 x i64> [[TMP4]], [[TMP4]]
+; CHECK-NEXT:    [[TMP13:%.*]] = sub <2 x i64> [[TMP7]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = shl <2 x i64> [[TMP5]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x i64> poison, i64 [[D0]], i32 0
+; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <2 x i64> [[TMP15]], <2 x i64> [[TMP6]], <2 x i32> <i32 0, i32 2>
+; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <2 x i64> poison, i64 [[D1]], i32 0
+; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <2 x i64> [[TMP17]], <2 x i64> [[TMP6]], <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT:    [[TMP19:%.*]] = or <2 x i64> [[TMP16]], [[TMP18]]
+; CHECK-NEXT:    [[TMP20:%.*]] = trunc <2 x i64> [[TMP19]] to <2 x i32>
 ; CHECK-NEXT:    br label [[BB:%.*]]
 ; CHECK:       bb:
 ; CHECK-NEXT:    [[PHI0:%.*]] = phi i32 [ [[T1:%.*]], [[BB]] ], [ [[TT0]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[PHI1:%.*]] = phi i32 [ [[T2:%.*]], [[BB]] ], [ [[TT1]], [[ENTRY]] ]
-; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ [[T3:%.*]], [[BB]] ], [ [[TT2]], [[ENTRY]] ]
-; CHECK-NEXT:    [[PHI3:%.*]] = phi i32 [ [[T4:%.*]], [[BB]] ], [ [[TT3]], [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP21:%.*]] = phi <2 x i32> [ [[TMP22:%.*]], [[BB]] ], [ [[TMP20]], [[ENTRY]] ]
 ; CHECK-NEXT:    [[T1]] = trunc i64 [[SHL1]] to i32
 ; CHECK-NEXT:    [[T2]] = trunc i64 [[SHL2]] to i32
-; CHECK-NEXT:    [[T3]] = trunc i64 [[SHL3]] to i32
-; CHECK-NEXT:    [[T4]] = trunc i64 [[SHL4]] to i32
+; CHECK-NEXT:    [[TMP22]] = trunc <2 x i64> [[TMP14]] to <2 x i32>
 ; CHECK-NEXT:    br label [[BB]]
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll b/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
--- a/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/rgb_phi.ll
@@ -23,40 +23,40 @@
 define float @foo(ptr nocapture readonly %A) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[A:%.*]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[A]], i64 2
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[TMP4:%.*]] = phi float [ [[TMP3]], [[ENTRY:%.*]] ], [ [[DOTPRE:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi float [ [[TMP2]], [[ENTRY:%.*]] ], [ [[DOTPRE:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ]
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
-; CHECK-NEXT:    [[B_032:%.*]] = phi float [ [[TMP2]], [[ENTRY]] ], [ [[ADD14:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = phi <2 x float> [ [[TMP1]], [[ENTRY]] ], [ [[TMP11:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = add nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load float, ptr [[ARRAYIDX7]], align 4
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x float> poison, float [[TMP4]], i32 0
-; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> [[TMP8]], float [[TMP7]], i32 1
-; CHECK-NEXT:    [[TMP10:%.*]] = fmul <2 x float> [[TMP9]], <float 7.000000e+00, float 8.000000e+00>
-; CHECK-NEXT:    [[TMP11]] = fadd <2 x float> [[TMP5]], [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = add nsw i64 [[INDVARS_IV]], 2
-; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP13:%.*]] = load float, ptr [[ARRAYIDX12]], align 4
-; CHECK-NEXT:    [[MUL13:%.*]] = fmul float [[TMP13]], 9.000000e+00
+; CHECK-NEXT:    [[B_032:%.*]] = phi float [ [[TMP1]], [[ENTRY]] ], [ [[ADD14:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi <2 x float> [ [[TMP0]], [[ENTRY]] ], [ [[TMP10:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load float, ptr [[ARRAYIDX7]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x float> poison, float [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[TMP6]], i32 1
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul <2 x float> [[TMP8]], <float 7.000000e+00, float 8.000000e+00>
+; CHECK-NEXT:    [[TMP10]] = fadd <2 x float> [[TMP4]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = add nsw i64 [[INDVARS_IV]], 2
+; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load float, ptr [[ARRAYIDX12]], align 4
+; CHECK-NEXT:    [[MUL13:%.*]] = fmul float [[TMP12]], 9.000000e+00
 ; CHECK-NEXT:    [[ADD14]] = fadd float [[B_032]], [[MUL13]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 3
-; CHECK-NEXT:    [[TMP14:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP14]], 121
+; CHECK-NEXT:    [[TMP13:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP13]], 121
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]], label [[FOR_END:%.*]]
 ; CHECK:       for.body.for.body_crit_edge:
 ; CHECK-NEXT:    [[ARRAYIDX3_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    [[DOTPRE]] = load float, ptr [[ARRAYIDX3_PHI_TRANS_INSERT]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x float> [[TMP11]], i32 0
-; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <2 x float> [[TMP11]], i32 1
-; CHECK-NEXT:    [[ADD16:%.*]] = fadd float [[TMP15]], [[TMP16]]
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <2 x float> [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <2 x float> [[TMP10]], i32 1
+; CHECK-NEXT:    [[ADD16:%.*]] = fadd float [[TMP14]], [[TMP15]]
 ; CHECK-NEXT:    [[ADD17:%.*]] = fadd float [[ADD16]], [[ADD14]]
 ; CHECK-NEXT:    ret float [[ADD17]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
--- a/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/slp-max-phi-size.ll
@@ -137,23 +137,23 @@
 ; MAX256-NEXT:    [[I6:%.*]] = fpext half [[HVAL]] to float
 ; MAX256-NEXT:    [[I9:%.*]] = fpext half [[HVAL]] to float
 ; MAX256-NEXT:    [[TMP0:%.*]] = insertelement <8 x float> poison, float [[I]], i32 0
-; MAX256-NEXT:    [[SHUFFLE11:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer
-; MAX256-NEXT:    [[TMP1:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0
-; MAX256-NEXT:    [[SHUFFLE12:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> zeroinitializer
-; MAX256-NEXT:    [[TMP2:%.*]] = fmul <8 x float> [[SHUFFLE11]], [[SHUFFLE12]]
-; MAX256-NEXT:    [[TMP3:%.*]] = fadd <8 x float> zeroinitializer, [[TMP2]]
-; MAX256-NEXT:    [[TMP4:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0
-; MAX256-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> zeroinitializer
-; MAX256-NEXT:    [[TMP5:%.*]] = fmul <8 x float> [[SHUFFLE]], [[SHUFFLE12]]
-; MAX256-NEXT:    [[TMP6:%.*]] = fadd <8 x float> zeroinitializer, [[TMP5]]
-; MAX256-NEXT:    [[TMP7:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0
-; MAX256-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> zeroinitializer
-; MAX256-NEXT:    [[TMP8:%.*]] = fmul <8 x float> [[SHUFFLE5]], [[SHUFFLE12]]
+; MAX256-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX256-NEXT:    [[TMP2:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0
+; MAX256-NEXT:    [[TMP3:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX256-NEXT:    [[TMP4:%.*]] = fmul <8 x float> [[TMP1]], [[TMP3]]
+; MAX256-NEXT:    [[TMP5:%.*]] = fadd <8 x float> zeroinitializer, [[TMP4]]
+; MAX256-NEXT:    [[TMP6:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0
+; MAX256-NEXT:    [[TMP7:%.*]] = shufflevector <8 x float> [[TMP6]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX256-NEXT:    [[TMP8:%.*]] = fmul <8 x float> [[TMP7]], [[TMP3]]
 ; MAX256-NEXT:    [[TMP9:%.*]] = fadd <8 x float> zeroinitializer, [[TMP8]]
-; MAX256-NEXT:    [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0
-; MAX256-NEXT:    [[SHUFFLE8:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer
-; MAX256-NEXT:    [[TMP11:%.*]] = fmul <8 x float> [[SHUFFLE8]], [[SHUFFLE12]]
-; MAX256-NEXT:    [[TMP12:%.*]] = fadd <8 x float> zeroinitializer, [[TMP11]]
+; MAX256-NEXT:    [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0
+; MAX256-NEXT:    [[TMP11:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX256-NEXT:    [[TMP12:%.*]] = fmul <8 x float> [[TMP11]], [[TMP3]]
+; MAX256-NEXT:    [[TMP13:%.*]] = fadd <8 x float> zeroinitializer, [[TMP12]]
+; MAX256-NEXT:    [[TMP14:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0
+; MAX256-NEXT:    [[TMP15:%.*]] = shufflevector <8 x float> [[TMP14]], <8 x float> poison, <8 x i32> zeroinitializer
+; MAX256-NEXT:    [[TMP16:%.*]] = fmul <8 x float> [[TMP15]], [[TMP3]]
+; MAX256-NEXT:    [[TMP17:%.*]] = fadd <8 x float> zeroinitializer, [[TMP16]]
 ; MAX256-NEXT:    switch i32 undef, label [[BB5:%.*]] [
 ; MAX256-NEXT:    i32 0, label [[BB2:%.*]]
 ; MAX256-NEXT:    i32 1, label [[BB3:%.*]]
@@ -166,40 +166,28 @@
 ; MAX256:       bb5:
 ; MAX256-NEXT:    br label [[BB2]]
 ; MAX256:       bb2:
-; MAX256-NEXT:    [[TMP13:%.*]] = phi <8 x float> [ [[TMP6]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
-; MAX256-NEXT:    [[TMP14:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[TMP9]], [[BB5]] ], [ [[TMP9]], [[BB1]] ]
-; MAX256-NEXT:    [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], [[BB3]] ], [ [[TMP12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[TMP12]], [[BB1]] ]
-; MAX256-NEXT:    [[TMP16:%.*]] = phi <8 x float> [ [[TMP3]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
-; MAX256-NEXT:    [[TMP17:%.*]] = extractelement <8 x float> [[TMP14]], i32 7
-; MAX256-NEXT:    store float [[TMP17]], ptr undef, align 4
+; MAX256-NEXT:    [[TMP18:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[TMP3]], [[BB1]] ]
+; MAX256-NEXT:    [[TMP19:%.*]] = phi <8 x float> [ [[TMP13]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP13]], [[BB5]] ], [ [[TMP13]], [[BB1]] ]
+; MAX256-NEXT:    [[TMP20:%.*]] = phi <8 x float> [ [[TMP17]], [[BB3]] ], [ [[TMP17]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[TMP17]], [[BB1]] ]
+; MAX256-NEXT:    [[TMP21:%.*]] = phi <8 x float> [ [[TMP5]], [[BB3]] ], [ [[TMP5]], [[BB4]] ], [ [[TMP5]], [[BB5]] ], [ [[TMP3]], [[BB1]] ]
+; MAX256-NEXT:    [[TMP22:%.*]] = extractelement <8 x float> [[TMP19]], i32 7
+; MAX256-NEXT:    store float [[TMP22]], ptr undef, align 4
 ; MAX256-NEXT:    ret void
 ;
 ; MAX1024-LABEL: @phi_float32(
 ; MAX1024-NEXT:  bb:
 ; MAX1024-NEXT:    br label [[BB1:%.*]]
 ; MAX1024:       bb1:
-; MAX1024-NEXT:    [[I:%.*]] = fpext half [[HVAL:%.*]] to float
-; MAX1024-NEXT:    [[I3:%.*]] = fpext half [[HVAL]] to float
-; MAX1024-NEXT:    [[I6:%.*]] = fpext half [[HVAL]] to float
-; MAX1024-NEXT:    [[I9:%.*]] = fpext half [[HVAL]] to float
-; MAX1024-NEXT:    [[TMP0:%.*]] = insertelement <8 x float> poison, float [[I]], i32 0
-; MAX1024-NEXT:    [[SHUFFLE11:%.*]] = shufflevector <8 x float> [[TMP0]], <8 x float> poison, <8 x i32> zeroinitializer
-; MAX1024-NEXT:    [[TMP1:%.*]] = insertelement <8 x float> poison, float [[FVAL:%.*]], i32 0
-; MAX1024-NEXT:    [[SHUFFLE12:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> zeroinitializer
-; MAX1024-NEXT:    [[TMP2:%.*]] = fmul <8 x float> [[SHUFFLE11]], [[SHUFFLE12]]
-; MAX1024-NEXT:    [[TMP3:%.*]] = fadd <8 x float> zeroinitializer, [[TMP2]]
-; MAX1024-NEXT:    [[TMP4:%.*]] = insertelement <8 x float> poison, float [[I3]], i32 0
-; MAX1024-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP4]], <8 x float> poison, <8 x i32> zeroinitializer
-; MAX1024-NEXT:    [[TMP5:%.*]] = fmul <8 x float> [[SHUFFLE]], [[SHUFFLE12]]
-; MAX1024-NEXT:    [[TMP6:%.*]] = fadd <8 x float> zeroinitializer, [[TMP5]]
-; MAX1024-NEXT:    [[TMP7:%.*]] = insertelement <8 x float> poison, float [[I6]], i32 0
-; MAX1024-NEXT:    [[SHUFFLE5:%.*]] = shufflevector <8 x float> [[TMP7]], <8 x float> poison, <8 x i32> zeroinitializer
-; MAX1024-NEXT:    [[TMP8:%.*]] = fmul <8 x float> [[SHUFFLE5]], [[SHUFFLE12]]
-; MAX1024-NEXT:    [[TMP9:%.*]] = fadd <8 x float> zeroinitializer, [[TMP8]]
-; MAX1024-NEXT:    [[TMP10:%.*]] = insertelement <8 x float> poison, float [[I9]], i32 0
-; MAX1024-NEXT:    [[SHUFFLE8:%.*]] = shufflevector <8 x float> [[TMP10]], <8 x float> poison, <8 x i32> zeroinitializer
-; MAX1024-NEXT:    [[TMP11:%.*]] = fmul <8 x float> [[SHUFFLE8]], [[SHUFFLE12]]
-; MAX1024-NEXT:    [[TMP12:%.*]] = fadd <8 x float> zeroinitializer, [[TMP11]]
+; MAX1024-NEXT:    [[TMP0:%.*]] = insertelement <4 x half> poison, half [[HVAL:%.*]], i32 0
+; MAX1024-NEXT:    [[TMP1:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <4 x i32> zeroinitializer
+; MAX1024-NEXT:    [[TMP2:%.*]] = fpext <4 x half> [[TMP1]] to <4 x float>
+; MAX1024-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+; MAX1024-NEXT:    [[TMP4:%.*]] = insertelement <32 x float> poison, float [[FVAL:%.*]], i32 0
+; MAX1024-NEXT:    [[TMP5:%.*]] = shufflevector <32 x float> [[TMP4]], <32 x float> poison, <32 x i32> zeroinitializer
+; MAX1024-NEXT:    [[TMP6:%.*]] = fmul <32 x float> [[TMP3]], [[TMP5]]
+; MAX1024-NEXT:    [[TMP7:%.*]] = fadd <32 x float> zeroinitializer, [[TMP6]]
+; MAX1024-NEXT:    [[TMP8:%.*]] = shufflevector <32 x float> [[TMP4]], <32 x float> [[TMP7]], <32 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; MAX1024-NEXT:    [[TMP9:%.*]] = shufflevector <32 x float> [[TMP8]], <32 x float> poison, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
 ; MAX1024-NEXT:    switch i32 undef, label [[BB5:%.*]] [
 ; MAX1024-NEXT:    i32 0, label [[BB2:%.*]]
 ; MAX1024-NEXT:    i32 1, label [[BB3:%.*]]
@@ -208,16 +196,15 @@
 ; MAX1024:       bb3:
 ; MAX1024-NEXT:    br label [[BB2]]
 ; MAX1024:       bb4:
+; MAX1024-NEXT:    [[TMP10:%.*]] = shufflevector <32 x float> [[TMP8]], <32 x float> [[TMP7]], <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
 ; MAX1024-NEXT:    br label [[BB2]]
 ; MAX1024:       bb5:
+; MAX1024-NEXT:    [[TMP11:%.*]] = shufflevector <32 x float> [[TMP8]], <32 x float> [[TMP7]], <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
 ; MAX1024-NEXT:    br label [[BB2]]
 ; MAX1024:       bb2:
-; MAX1024-NEXT:    [[TMP13:%.*]] = phi <8 x float> [ [[TMP6]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
-; MAX1024-NEXT:    [[TMP14:%.*]] = phi <8 x float> [ [[TMP9]], [[BB3]] ], [ [[SHUFFLE12]], [[BB4]] ], [ [[TMP9]], [[BB5]] ], [ [[TMP9]], [[BB1]] ]
-; MAX1024-NEXT:    [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], [[BB3]] ], [ [[TMP12]], [[BB4]] ], [ [[SHUFFLE12]], [[BB5]] ], [ [[TMP12]], [[BB1]] ]
-; MAX1024-NEXT:    [[TMP16:%.*]] = phi <8 x float> [ [[TMP3]], [[BB3]] ], [ [[TMP3]], [[BB4]] ], [ [[TMP3]], [[BB5]] ], [ [[SHUFFLE12]], [[BB1]] ]
-; MAX1024-NEXT:    [[TMP17:%.*]] = extractelement <8 x float> [[TMP14]], i32 7
-; MAX1024-NEXT:    store float [[TMP17]], ptr undef, align 4
+; MAX1024-NEXT:    [[TMP12:%.*]] = phi <32 x float> [ [[TMP7]], [[BB3]] ], [ [[TMP10]], [[BB4]] ], [ [[TMP11]], [[BB5]] ], [ [[TMP9]], [[BB1]] ]
+; MAX1024-NEXT:    [[TMP13:%.*]] = extractelement <32 x float> [[TMP12]], i32 15
+; MAX1024-NEXT:    store float [[TMP13]], ptr undef, align 4
 ; MAX1024-NEXT:    ret void
 ;
 bb: