diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -33,25 +33,31 @@
 
 /// These are the kinds of recurrences that we support.
 enum class RecurKind {
-  None,       ///< Not a recurrence.
-  Add,        ///< Sum of integers.
-  Mul,        ///< Product of integers.
-  Or,         ///< Bitwise or logical OR of integers.
-  And,        ///< Bitwise or logical AND of integers.
-  Xor,        ///< Bitwise or logical XOR of integers.
-  SMin,       ///< Signed integer min implemented in terms of select(cmp()).
-  SMax,       ///< Signed integer max implemented in terms of select(cmp()).
-  UMin,       ///< Unisgned integer min implemented in terms of select(cmp()).
-  UMax,       ///< Unsigned integer max implemented in terms of select(cmp()).
-  FAdd,       ///< Sum of floats.
-  FMul,       ///< Product of floats.
-  FMin,       ///< FP min implemented in terms of select(cmp()).
-  FMax,       ///< FP max implemented in terms of select(cmp()).
-  FMulAdd,    ///< Fused multiply-add of floats (a * b + c).
-  SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
-              ///< invariant
-  SelectFCmp  ///< Integer select(fcmp(),x,y) where one of (x,y) is loop
-              ///< invariant
+  None,         ///< Not a recurrence.
+  Add,          ///< Sum of integers.
+  Mul,          ///< Product of integers.
+  Or,           ///< Bitwise or logical OR of integers.
+  And,          ///< Bitwise or logical AND of integers.
+  Xor,          ///< Bitwise or logical XOR of integers.
+  SMin,         ///< Signed integer min implemented in terms of select(cmp()).
+  SMax,         ///< Signed integer max implemented in terms of select(cmp()).
+  UMin,         ///< Unisgned integer min implemented in terms of select(cmp()).
+  UMax,         ///< Unsigned integer max implemented in terms of select(cmp()).
+  FAdd,         ///< Sum of floats.
+  FMul,         ///< Product of floats.
+  FMin,         ///< FP min implemented in terms of select(cmp()).
+  FMax,         ///< FP max implemented in terms of select(cmp()).
+  FMulAdd,      ///< Fused multiply-add of floats (a * b + c).
+  SelectICmp,   ///< Integer select(icmp(),x,y) where one of (x,y) is loop
+                ///< invariant
+  SelectFCmp,   ///< Integer select(fcmp(),x,y) where one of (x,y) is loop
+                ///< invariant
+  SelectIVICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is increasing
+                ///< loop induction PHI
+  SelectIVFCmp, ///< Integer select(fcmp(),x,y) where one of (x,y) is increasing
+                ///< loop induction PHI
+  MinMaxFirstIdx, ///< Min/Max with first index
+  MinMaxLastIdx   ///< Min/Max with last index
 };
 
 /// The RecurrenceDescriptor is used to identify recurrences variables in a
@@ -74,11 +80,13 @@
                        RecurKind K, FastMathFlags FMF, Instruction *ExactFP,
                        Type *RT, bool Signed, bool Ordered,
                        SmallPtrSetImpl<Instruction *> &CI,
-                       unsigned MinWidthCastToRecurTy)
+                       unsigned MinWidthCastToRecurTy, PHINode *UserRecurPhi,
+                       RecurKind UserRecurKind)
       : IntermediateStore(Store), StartValue(Start), LoopExitInstr(Exit),
         Kind(K), FMF(FMF), ExactFPMathInst(ExactFP), RecurrenceType(RT),
         IsSigned(Signed), IsOrdered(Ordered),
-        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy) {
+        MinWidthCastToRecurrenceType(MinWidthCastToRecurTy),
+        UserRecurPhi(UserRecurPhi), UserRecurKind(UserRecurKind) {
     CastInsts.insert(CI.begin(), CI.end());
   }
 
@@ -93,6 +101,12 @@
         : IsRecurrence(true), PatternLastInst(I), RecKind(K),
           ExactFPMathInst(ExactFP) {}
 
+    InstDesc(bool IsRecur, Instruction *I, PHINode *CandUserRecurPhi,
+             RecurKind CandUserRecurKind, Instruction *ExactFP = nullptr)
+        : IsRecurrence(IsRecur), PatternLastInst(I), RecKind(RecurKind::None),
+          CandUserRecurPhi(CandUserRecurPhi),
+          CandUserRecurKind(CandUserRecurKind), ExactFPMathInst(ExactFP) {}
+
     bool isRecurrence() const { return IsRecurrence; }
 
     bool needsExactFPMath() const { return ExactFPMathInst != nullptr; }
@@ -103,6 +117,14 @@
 
     Instruction *getPatternInst() const { return PatternLastInst; }
 
+    PHINode *getCandUserRecurPhi() const { return CandUserRecurPhi; }
+
+    RecurKind getCandUserRecurKind() const { return CandUserRecurKind; }
+
+    bool isCandidateUser() const {
+      return getCandUserRecurPhi() && getCandUserRecurKind() != RecurKind::None;
+    }
+
   private:
     // Is this instruction a recurrence candidate.
     bool IsRecurrence;
@@ -113,6 +135,11 @@
     RecurKind RecKind;
     // Recurrence does not allow floating-point reassociation.
     Instruction *ExactFPMathInst;
+    // This instruction may be the operation of another recurrence.
+    // Record potential recurrence phi.
+    PHINode *CandUserRecurPhi = nullptr;
+    // And expected recurrence kind.
+    RecurKind CandUserRecurKind = RecurKind::None;
   };
 
   /// Returns a struct describing if the instruction 'I' can be a recurrence
@@ -123,7 +150,7 @@
   /// the returned struct.
   static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I,
                                     RecurKind Kind, InstDesc &Prev,
-                                    FastMathFlags FuncFMF);
+                                    FastMathFlags FuncFMF, ScalarEvolution *SE);
 
   /// Returns true if instruction I has multiple uses in Insts
   static bool hasMultipleUsesOf(Instruction *I,
@@ -139,7 +166,18 @@
   /// Kind. \p Prev specifies the description of an already processed select
   /// instruction, so its corresponding cmp can be matched to it.
   static InstDesc isMinMaxPattern(Instruction *I, RecurKind Kind,
-                                  const InstDesc &Prev);
+                                  const InstDesc &Prev, Loop *Loop,
+                                  PHINode *OrigPhi, ScalarEvolution *SE);
+
+  /// Returns RecurKind describing which min/max recurrence kind the instruction
+  /// \p I belongs to. Return RecurKind::None if instruction \p I is not matched
+  /// any of min/max recurrence kind. Unlike isMinMaxPattern, this function does
+  /// not limit exactly one use of cmp value.
+  static RecurKind isMinMaxOperation(Instruction *I);
+
+  static InstDesc isMinMaxIdxPattern(Loop *Loop, Instruction *I,
+                                     PHINode *MinMaxPhi, RecurKind MinMaxKind,
+                                     ScalarEvolution *SE);
 
   /// Returns a struct describing whether the instruction is either a
   ///   Select(ICmp(A, B), X, Y), or
@@ -148,7 +186,8 @@
   /// value. \p Prev specifies the description of an already processed select
   /// instruction, so its corresponding cmp can be matched to it.
   static InstDesc isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
-                                     Instruction *I, InstDesc &Prev);
+                                     Instruction *I, InstDesc &Prev,
+                                     ScalarEvolution *SE);
 
   /// Returns a struct describing if the instruction is a
   /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
@@ -234,10 +273,23 @@
     return isIntMinMaxRecurrenceKind(Kind) || isFPMinMaxRecurrenceKind(Kind);
   }
 
+  /// Returns true if the recurrence kind is a max kind.
+  static bool isMaxRecurrenceKind(RecurKind Kind) {
+    return Kind == RecurKind::UMax || Kind == RecurKind::SMax ||
+           Kind == RecurKind::FMax;
+  }
+
+  static bool isMinMaxIdxRecurrenceKind(RecurKind Kind) {
+    return Kind == RecurKind::MinMaxFirstIdx ||
+           Kind == RecurKind::MinMaxLastIdx;
+  }
+
   /// Returns true if the recurrence kind is of the form
   ///   select(cmp(),x,y) where one of (x,y) is loop invariant.
   static bool isSelectCmpRecurrenceKind(RecurKind Kind) {
-    return Kind == RecurKind::SelectICmp || Kind == RecurKind::SelectFCmp;
+    return Kind == RecurKind::SelectICmp || Kind == RecurKind::SelectFCmp ||
+           Kind == RecurKind::SelectIVICmp || Kind == RecurKind::SelectIVFCmp ||
+	   isMinMaxIdxRecurrenceKind(Kind);
   }
 
   /// Returns the type of the recurrence. This type can be narrower than the
@@ -248,6 +300,29 @@
   /// recurrence.
   const SmallPtrSet<Instruction *, 8> &getCastInsts() const { return CastInsts; }
 
+  PHINode *getUserRecurPhi() const { return UserRecurPhi; }
+
+  void setRecurKind(RecurKind K) {
+    assert((K != RecurKind::None) && "Unexpected recurrence kind.");
+    Kind = K;
+  }
+
+  void setDependMinMaxRecDes(RecurrenceDescriptor *MMRD) {
+    assert(isMinMaxRecurrenceKind(MMRD->getRecurrenceKind()) &&
+           "DependMinMaxRecDes must be a min/max recurrence.");
+    DependMinMaxRecDes = MMRD;
+  }
+
+  RecurrenceDescriptor *getDependMinMaxRecDes() const {
+    return DependMinMaxRecDes;
+  }
+
+  bool hasUserRecurrence() const {
+    return UserRecurPhi && UserRecurKind != RecurKind::None;
+  }
+
+  bool fixUserRecurrence(RecurrenceDescriptor &UserRedDes);
+
   /// Returns the minimum width used by the recurrence in bits.
   unsigned getMinWidthCastToRecurrenceTypeInBits() const {
     return MinWidthCastToRecurrenceType;
@@ -300,6 +375,12 @@
   SmallPtrSet<Instruction *, 8> CastInsts;
   // The minimum width used by the recurrence.
   unsigned MinWidthCastToRecurrenceType;
+
+  PHINode *UserRecurPhi = nullptr;
+
+  RecurKind UserRecurKind = RecurKind::None;
+
+  RecurrenceDescriptor *DependMinMaxRecDes = nullptr;
 };
 
 /// A struct for saving information about induction variables.
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -359,7 +359,7 @@
 /// to select between \p Left and \p Right. Any lane value in \p Left that
 /// matches 2) will be merged into \p Right.
 Value *createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal, RecurKind RK,
-                         Value *Left, Value *Right);
+                         Value *Left, Value *Right, Value *SrcCmp = nullptr);
 
 /// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
 /// The Builder's fast-math-flags must be set to propagate the expected values.
@@ -388,11 +388,25 @@
 /// Create a target reduction of the given vector \p Src for a reduction of the
 /// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation
 /// is described by \p Desc.
-Value *createSelectCmpTargetReduction(IRBuilderBase &B,
-                                      const TargetTransformInfo *TTI,
-                                      Value *Src,
-                                      const RecurrenceDescriptor &Desc,
-                                      PHINode *OrigPhi);
+Value *createInvariantSelectCmpTargetReduction(IRBuilderBase &B,
+                                               const TargetTransformInfo *TTI,
+                                               Value *Src,
+                                               const RecurrenceDescriptor &Desc,
+                                               PHINode *OrigPhi);
+
+Value *createMMISelectCmpTargetReduction(IRBuilderBase &Builder,
+                                         const TargetTransformInfo *TTI,
+                                         Value *Src,
+                                         const RecurrenceDescriptor &Desc,
+                                         PHINode *OrigPhi, Value *SrcMask);
+
+/// Create a target reduction of the given vector \p Src for a reduction of the
+/// kind RecurKind::SelectICmp or RecurKind::SelectFCmp. The reduction operation
+/// is described by \p Desc.
+Value *
+createSelectCmpTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
+                               Value *Src, const RecurrenceDescriptor &Desc,
+                               PHINode *OrigPhi, Value *SrcMask = nullptr);
 
 /// Create a generic target reduction using a recurrence descriptor \p Desc
 /// The target is queried to determine if intrinsics or shuffle sequences are
@@ -400,7 +414,8 @@
 /// Fast-math-flags are propagated using the RecurrenceDescriptor.
 Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
                              const RecurrenceDescriptor &Desc, Value *Src,
-                             PHINode *OrigPhi = nullptr);
+                             PHINode *OrigPhi = nullptr,
+                             Value *SrcMask = nullptr);
 
 /// Create an ordered reduction intrinsic using the given recurrence
 /// descriptor \p Desc.
@@ -408,6 +423,11 @@
                               const RecurrenceDescriptor &Desc, Value *Src,
                               Value *Start);
 
+Value *createSentinelValueHandling(IRBuilderBase &Builder,
+                                   const TargetTransformInfo *TTI,
+                                   const RecurrenceDescriptor &Desc,
+                                   Value *Rdx);
+
 /// Get the intersection (logical and) of all of the potential IR flags
 /// of each scalar operation (VL) that will be converted into a vector (I).
 /// If OpValue is non-null, we only consider operations similar to OpValue
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -54,6 +54,10 @@
   case RecurKind::UMin:
   case RecurKind::SelectICmp:
   case RecurKind::SelectFCmp:
+  case RecurKind::SelectIVICmp:
+  case RecurKind::SelectIVFCmp:
+  case RecurKind::MinMaxFirstIdx:
+  case RecurKind::MinMaxLastIdx:
     return true;
   }
   return false;
@@ -63,6 +67,21 @@
   return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind);
 }
 
+bool RecurrenceDescriptor::fixUserRecurrence(RecurrenceDescriptor &UserRedDes) {
+  RecurKind UserCurrKind = UserRedDes.getRecurrenceKind();
+  assert((UserCurrKind != RecurKind::None) && "Unexpected recurrence kind.");
+
+  if (isMinMaxRecurrenceKind(Kind))
+    if (UserCurrKind == RecurKind::SelectIVICmp ||
+        UserCurrKind == RecurKind::SelectIVFCmp) {
+      UserRedDes.setRecurKind(UserRecurKind);
+      UserRedDes.setDependMinMaxRecDes(this);
+      return true;
+    }
+
+  return false;
+}
+
 /// Determines if Phi may have been type-promoted. If Phi has a single user
 /// that ANDs the Phi with a type mask, return the user. RT is updated to
 /// account for the narrower bit width represented by the mask, and the AND
@@ -247,6 +266,9 @@
   // must include the original PHI.
   bool FoundStartPHI = false;
 
+  PHINode *UserRecurPHI = nullptr;
+  RecurKind UserRecurKind = RecurKind::None;
+
   // To recognize min/max patterns formed by a icmp select sequence, we store
   // the number of instruction we saw from the recognized min/max pattern,
   //  to make sure we only see exactly the two instructions.
@@ -375,12 +397,32 @@
     // type-promoted).
     if (Cur != Start) {
       ReduxDesc =
-          isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);
+          isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE);
       ExactFPMathInst = ExactFPMathInst == nullptr
                             ? ReduxDesc.getExactFPMathInst()
                             : ExactFPMathInst;
-      if (!ReduxDesc.isRecurrence())
-        return false;
+      if (!ReduxDesc.isRecurrence()) {
+        if (!ReduxDesc.isCandidateUser())
+          return false;
+
+        // TODO: Only allow one user recurrence now.
+        if (UserRecurPHI)
+          return false;
+
+        UserRecurPHI = ReduxDesc.getCandUserRecurPhi();
+        UserRecurKind = ReduxDesc.getCandUserRecurKind();
+	// TODO: Call AddReductionVar here?
+
+        // Fix NumCmpSelectPatternInst
+        if (auto *SI = dyn_cast<SelectInst>(Cur)) {
+          auto *CI = dyn_cast<CmpInst>(SI->getCondition());
+          if (CI->hasOneUse())
+            --NumCmpSelectPatternInst;
+        }
+        // Stop visiting the users of current instruction if it contains user
+        // recurrence.
+        continue;
+      }
       // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
       if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) {
         FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags();
@@ -419,10 +461,12 @@
     if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
       return false;
 
-    if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp) &&
+    if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp ||
+         Kind == RecurKind::SelectIVICmp) &&
         (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
       ++NumCmpSelectPatternInst;
-    if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp) &&
+    if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp ||
+         Kind == RecurKind::SelectIVFCmp) &&
         (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
       ++NumCmpSelectPatternInst;
 
@@ -488,9 +532,10 @@
                  ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
                    !isa<SelectInst>(UI)) ||
                   (!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
-                   !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal)
+                   !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal, SE)
                         .isRecurrence() &&
-                   !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))
+                   !isMinMaxPattern(UI, Kind, IgnoredVal, TheLoop, Phi, SE)
+                        .isRecurrence())))
         return false;
 
       // Remember that we completed the cycle.
@@ -600,7 +645,8 @@
   // Save the description of this reduction variable.
   RecurrenceDescriptor RD(RdxStart, ExitInstruction, IntermediateStore, Kind,
                           FMF, ExactFPMathInst, RecurrenceType, IsSigned,
-                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType);
+                          IsOrdered, CastInsts, MinWidthCastToRecurrenceType,
+                          UserRecurPHI, UserRecurKind);
   RedDes = RD;
 
   return true;
@@ -629,7 +675,8 @@
 // value (3 in the example above).
 RecurrenceDescriptor::InstDesc
 RecurrenceDescriptor::isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
-                                         Instruction *I, InstDesc &Prev) {
+                                         Instruction *I, InstDesc &Prev,
+                                         ScalarEvolution *SE) {
   // We must handle the select(cmp(),x,y) as a single instruction. Advance to
   // the select.
   CmpInst::Predicate Pred;
@@ -653,19 +700,43 @@
   else
     return InstDesc(false, I);
 
+  auto isIncreasingLoopInduction = [&SE, &Loop](Value *V) {
+    if (!SE)
+      return false;
+
+    auto *Phi = dyn_cast<PHINode>(V);
+    if (!Phi)
+      return false;
+
+    auto LB = Loop::LoopBounds::getBounds(*Loop, *Phi, *SE);
+    if (!LB)
+      return false;
+
+    auto Direction = LB->getDirection();
+    return Direction == Loop::LoopBounds::Direction::Increasing;
+  };
+
   // We are looking for selects of the form:
   //   select(cmp(), phi, loop_invariant) or
   //   select(cmp(), loop_invariant, phi)
-  if (!Loop->isLoopInvariant(NonPhi))
-    return InstDesc(false, I);
+  if (Loop->isLoopInvariant(NonPhi))
+    return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::SelectICmp
+                                                       : RecurKind::SelectFCmp);
+  // or
+  //   select(cmp(), phi, loop_induction) or
+  //   select(cmp(), loop_induction, phi)
+  if (isIncreasingLoopInduction(NonPhi))
+    return InstDesc(I, isa<ICmpInst>(I->getOperand(0))
+                           ? RecurKind::SelectIVICmp
+                           : RecurKind::SelectIVFCmp);
 
-  return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::SelectICmp
-                                                     : RecurKind::SelectFCmp);
+  return InstDesc(false, I);
 }
 
 RecurrenceDescriptor::InstDesc
 RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
-                                      const InstDesc &Prev) {
+                                      const InstDesc &Prev, Loop *Loop,
+                                      PHINode *OrigPhi, ScalarEvolution *SE) {
   assert((isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CallInst>(I)) &&
          "Expected a cmp or select or call instruction");
   if (!isMinMaxRecurrenceKind(Kind))
@@ -685,29 +756,135 @@
                          m_Value())))
     return InstDesc(false, I);
 
+  RecurKind MMRK = isMinMaxOperation(I);
+  if (MMRK != RecurKind::None)
+    return InstDesc(Kind == MMRK, I);
+
+  if (isa<SelectInst>(I))
+    return isMinMaxIdxPattern(Loop, I, OrigPhi, Kind, SE);
+
+  return InstDesc(false, I);
+}
+
+RecurKind RecurrenceDescriptor::isMinMaxOperation(Instruction *I) {
   // Look for a min/max pattern.
   if (match(I, m_UMin(m_Value(), m_Value())))
-    return InstDesc(Kind == RecurKind::UMin, I);
+    return RecurKind::UMin;
   if (match(I, m_UMax(m_Value(), m_Value())))
-    return InstDesc(Kind == RecurKind::UMax, I);
+    return RecurKind::UMax;
   if (match(I, m_SMax(m_Value(), m_Value())))
-    return InstDesc(Kind == RecurKind::SMax, I);
+    return RecurKind::SMax;
   if (match(I, m_SMin(m_Value(), m_Value())))
-    return InstDesc(Kind == RecurKind::SMin, I);
+    return RecurKind::SMin;
   if (match(I, m_OrdFMin(m_Value(), m_Value())))
-    return InstDesc(Kind == RecurKind::FMin, I);
+    return RecurKind::FMin;
   if (match(I, m_OrdFMax(m_Value(), m_Value())))
-    return InstDesc(Kind == RecurKind::FMax, I);
+    return RecurKind::FMax;
   if (match(I, m_UnordFMin(m_Value(), m_Value())))
-    return InstDesc(Kind == RecurKind::FMin, I);
+    return RecurKind::FMin;
   if (match(I, m_UnordFMax(m_Value(), m_Value())))
-    return InstDesc(Kind == RecurKind::FMax, I);
+    return RecurKind::FMax;
   if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
-    return InstDesc(Kind == RecurKind::FMin, I);
+    return RecurKind::FMin;
   if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
-    return InstDesc(Kind == RecurKind::FMax, I);
+    return RecurKind::FMax;
 
-  return InstDesc(false, I);
+  return RecurKind::None;
+}
+
+RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isMinMaxIdxPattern(
+    Loop *Loop, Instruction *I, PHINode *MinMaxPhi, RecurKind MinMaxKind,
+    ScalarEvolution *SE) {
+  assert(isa<SelectInst>(I) && "Expected a select instruction");
+  // TODO: FP MinMax
+  if (!isIntMinMaxRecurrenceKind(MinMaxKind))
+    return InstDesc(false, I);
+
+  // Requires SCEV to check the index part
+  if (!SE) {
+    LLVM_DEBUG(dbgs() << "MinMaxIdx patterns are not recognized without "
+                      << "Scalar Evolution Analysis\n");
+    return InstDesc(false, I);
+  }
+
+  // Check the index select
+  auto *SI = dyn_cast<SelectInst>(I);
+  auto *CI = dyn_cast<CmpInst>(SI->getCondition());
+  Value *LHS = CI->getOperand(0), *RHS = CI->getOperand(1);
+
+  // %cmp = icmp pred, %mmphi, %0
+  // %select = select %cmp, %update, %idxphi
+  // Check if cmp used min/max phi
+  bool IsLHSPhi;
+  if (MinMaxPhi == dyn_cast<PHINode>(LHS))
+    IsLHSPhi = true;
+  else if (MinMaxPhi == dyn_cast<PHINode>(RHS))
+    IsLHSPhi = false;
+  else
+    return InstDesc(false, I);
+
+  // Normalize the predicate, and get which side the select should update idx
+  // TODO: Need to consider commutable.
+  CmpInst::Predicate NormPred =
+      IsLHSPhi ? CI->getPredicate() : CI->getInversePredicate();
+  bool UpdateSide;
+  RecurKind ExpectedIdxRK;
+  switch (NormPred) {
+  case CmpInst::ICMP_SLT:
+  case CmpInst::ICMP_ULT:
+    // %mmphi <  %0
+    UpdateSide = isMaxRecurrenceKind(MinMaxKind);
+    ExpectedIdxRK = isMaxRecurrenceKind(MinMaxKind) ? RecurKind::MinMaxFirstIdx
+                                                    : RecurKind::MinMaxLastIdx;
+    break;
+  case CmpInst::ICMP_SLE:
+  case CmpInst::ICMP_ULE:
+    // %mmphi <= %0
+    UpdateSide = isMaxRecurrenceKind(MinMaxKind);
+    ExpectedIdxRK = isMaxRecurrenceKind(MinMaxKind) ? RecurKind::MinMaxLastIdx
+                                                    : RecurKind::MinMaxFirstIdx;
+    break;
+  case CmpInst::ICMP_SGT:
+  case CmpInst::ICMP_UGT:
+    // %mmphi >  %0
+    UpdateSide = !isMaxRecurrenceKind(MinMaxKind);
+    ExpectedIdxRK = isMaxRecurrenceKind(MinMaxKind) ? RecurKind::MinMaxLastIdx
+                                                    : RecurKind::MinMaxFirstIdx;
+    break;
+  case CmpInst::ICMP_SGE:
+  case CmpInst::ICMP_UGE:
+    // %mmphi >= %0
+    UpdateSide = !isMaxRecurrenceKind(MinMaxKind);
+    ExpectedIdxRK = isMaxRecurrenceKind(MinMaxKind) ? RecurKind::MinMaxFirstIdx
+                                                    : RecurKind::MinMaxLastIdx;
+    break;
+  default:
+    return InstDesc(false, I);
+  }
+
+  // Get the reduction phi of index select
+  Value *IdxUpdateV = UpdateSide ? SI->getTrueValue() : SI->getFalseValue();
+  Value *IdxReduxV = UpdateSide ? SI->getFalseValue() : SI->getTrueValue();
+  // Handle the operand of index select may have been casted.
+  if (auto *Cast = dyn_cast<CastInst>(IdxUpdateV))
+    IdxUpdateV = Cast->getOperand(0);
+
+  auto *IdxUpdatePhi = dyn_cast<PHINode>(IdxUpdateV);
+  auto *IdxReduxPhi = dyn_cast<PHINode>(IdxReduxV);
+  if (!IdxUpdatePhi || !IdxReduxPhi)
+    return InstDesc(false, I);
+
+  // Check update side is a loop induction variable
+  InductionDescriptor ID;
+  if (!InductionDescriptor::isInductionPHI(IdxUpdatePhi, Loop, SE, ID))
+    return InstDesc(false, I);
+
+  // The reduction phi of index select and reduction phi of min/max must not the
+  // same
+  if (IdxReduxPhi == MinMaxPhi)
+    return InstDesc(false, I);
+
+  return InstDesc(false, I, IdxReduxPhi, ExpectedIdxRK);
 }
 
 /// Returns true if the select instruction has users in the compare-and-add
@@ -762,10 +939,9 @@
   return InstDesc(true, SI);
 }
 
-RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
-                                        Instruction *I, RecurKind Kind,
-                                        InstDesc &Prev, FastMathFlags FuncFMF) {
+RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
+    Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev,
+    FastMathFlags FuncFMF, ScalarEvolution *SE) {
   assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
   switch (I->getOpcode()) {
   default:
@@ -800,13 +976,13 @@
   case Instruction::ICmp:
   case Instruction::Call:
     if (isSelectCmpRecurrenceKind(Kind))
-      return isSelectCmpPattern(L, OrigPhi, I, Prev);
+      return isSelectCmpPattern(L, OrigPhi, I, Prev, SE);
     if (isIntMinMaxRecurrenceKind(Kind) ||
         (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
           (isa<FPMathOperator>(I) && I->hasNoNaNs() &&
            I->hasNoSignedZeros())) &&
          isFPMinMaxRecurrenceKind(Kind)))
-      return isMinMaxPattern(I, Kind, Prev);
+      return isMinMaxPattern(I, Kind, Prev, L, OrigPhi, SE);
     else if (isFMulAddIntrinsic(I))
       return InstDesc(Kind == RecurKind::FMulAdd, I,
                       I->hasAllowReassoc() ? nullptr : I);
@@ -1128,6 +1304,12 @@
   case RecurKind::SelectFCmp:
     return getRecurrenceStartValue();
     break;
+  case RecurKind::SelectIVICmp:
+  case RecurKind::SelectIVFCmp:
+  case RecurKind::MinMaxFirstIdx:
+  case RecurKind::MinMaxLastIdx:
+    // FIXME: SMax or UMax, I'm not sure which one is correct.
+    return getRecurrenceIdentity(RecurKind::SMax, Tp, FMF);
   default:
     llvm_unreachable("Unknown recurrence kind");
   }
@@ -1155,10 +1337,15 @@
   case RecurKind::UMax:
   case RecurKind::UMin:
   case RecurKind::SelectICmp:
+  case RecurKind::SelectIVICmp:
+  // TODO: maybe new FMinMaxFirstIdx/ FMinMaxLastIdx
+  case RecurKind::MinMaxFirstIdx:
+  case RecurKind::MinMaxLastIdx:
     return Instruction::ICmp;
   case RecurKind::FMax:
   case RecurKind::FMin:
   case RecurKind::SelectFCmp:
+  case RecurKind::SelectIVFCmp:
     return Instruction::FCmp;
   default:
     llvm_unreachable("Unknown recurrence operation");
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -900,12 +900,37 @@
 }
 
 Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
-                               RecurKind RK, Value *Left, Value *Right) {
-  if (auto VTy = dyn_cast<VectorType>(Left->getType()))
-    StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
-  Value *Cmp =
-      Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
-  return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
+                               RecurKind RK, Value *Left, Value *Right,
+                               Value *SrcCmp) {
+  switch (RK) {
+  case RecurKind::SelectICmp:
+  case RecurKind::SelectFCmp: {
+    if (auto VTy = dyn_cast<VectorType>(Left->getType()))
+      StartVal = Builder.CreateVectorSplat(VTy->getElementCount(), StartVal);
+    Value *Cmp =
+        Builder.CreateCmp(CmpInst::ICMP_NE, Left, StartVal, "rdx.select.cmp");
+    return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
+  }
+  case RecurKind::SelectIVICmp:
+  case RecurKind::SelectIVFCmp:
+    // TODO: SMax or UMax?
+    return createMinMaxOp(Builder, RecurKind::SMax, Left, Right);
+  case RecurKind::MinMaxFirstIdx: {
+    assert((SrcCmp && isa<CmpInst>(SrcCmp)) &&
+           "SrcCmp should not be nullptr when MinMaxFirstIdx recurrence");
+    auto *SrcCI = dyn_cast<CmpInst>(SrcCmp);
+    CmpInst::Predicate Pred = SrcCI->getNonStrictPredicate();
+    Value *Cmp = Builder.CreateCmp(Pred, SrcCI->getOperand(0),
+                                   SrcCI->getOperand(1), "rdx.select.cmp");
+    return Builder.CreateSelect(Cmp, Left, Right, "rdx.select");
+  }
+  case RecurKind::MinMaxLastIdx:
+    assert((SrcCmp && isa<CmpInst>(SrcCmp)) &&
+           "SrcCmp should not be nullptr when MinMaxLastIdx recurrence");
+    return Builder.CreateSelect(SrcCmp, Left, Right, "rdx.select");
+  default:
+    llvm_unreachable("Unknown SelectCmp recurrence kind");
+  }
 }
 
 Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
@@ -982,13 +1007,11 @@
   return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
 }
 
-Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
-                                            const TargetTransformInfo *TTI,
-                                            Value *Src,
-                                            const RecurrenceDescriptor &Desc,
-                                            PHINode *OrigPhi) {
-  assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind(
-             Desc.getRecurrenceKind()) &&
+Value *llvm::createInvariantSelectCmpTargetReduction(
+    IRBuilderBase &Builder, const TargetTransformInfo *TTI, Value *Src,
+    const RecurrenceDescriptor &Desc, PHINode *OrigPhi) {
+  assert((Desc.getRecurrenceKind() == RecurKind::SelectICmp ||
+          Desc.getRecurrenceKind() == RecurKind::SelectFCmp) &&
          "Unexpected reduction kind");
   Value *InitVal = Desc.getRecurrenceStartValue();
   Value *NewVal = nullptr;
@@ -1022,6 +1045,58 @@
   return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
 }
 
+Value *llvm::createMMISelectCmpTargetReduction(
+    IRBuilderBase &Builder, const TargetTransformInfo *TTI, Value *Src,
+    const RecurrenceDescriptor &Desc, PHINode *OrigPhi, Value *SrcMask) {
+  assert(RecurrenceDescriptor::isMinMaxIdxRecurrenceKind(
+             Desc.getRecurrenceKind()) &&
+         "Unexpected reduction kind");
+  RecurKind Kind = Desc.getRecurrenceKind();
+  // FIXME: UMax/SMax or UMin/UMax?
+  RecurKind RdxExtractK =
+      Kind == RecurKind::MinMaxFirstIdx ? RecurKind::SMin : RecurKind::SMax;
+
+  assert(SrcMask && "MinMaxIdx recurrence requests mask");
+  // TODO: If vp reduction intrinsic is supported, there is no need to generate
+  // additional select here.
+  auto *SrcVecEltTy = cast<VectorType>(Src->getType())->getElementType();
+  Value *RdxOpIden = Desc.getRecurrenceIdentity(RdxExtractK, SrcVecEltTy,
+                                                Desc.getFastMathFlags());
+  ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
+  RdxOpIden = Builder.CreateVectorSplat(EC, RdxOpIden);
+  Value *NewVal = Builder.CreateSelect(SrcMask, Src, RdxOpIden, "mask.select");
+
+  return createSimpleTargetReduction(Builder, TTI, NewVal, RdxExtractK);
+}
+
+Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
+                                            const TargetTransformInfo *TTI,
+                                            Value *Src,
+                                            const RecurrenceDescriptor &Desc,
+                                            PHINode *OrigPhi, Value *SrcMask) {
+  assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind(
+             Desc.getRecurrenceKind()) &&
+         "Unexpected reduction kind");
+  RecurKind RdxKind = Desc.getRecurrenceKind();
+  switch (RdxKind) {
+  case RecurKind::SelectICmp:
+  case RecurKind::SelectFCmp:
+    return createInvariantSelectCmpTargetReduction(Builder, TTI, Src, Desc,
+                                                   OrigPhi);
+  case RecurKind::SelectIVICmp:
+  case RecurKind::SelectIVFCmp:
+    // FIXME: SMax or UMax?
+    // TODO: Decreasing induction need fix here
+    return Builder.CreateIntMaxReduce(Src, true);
+  case RecurKind::MinMaxFirstIdx:
+  case RecurKind::MinMaxLastIdx:
+    return createMMISelectCmpTargetReduction(Builder, TTI, Src, Desc, OrigPhi,
+                                             SrcMask);
+  default:
+    llvm_unreachable("Unknown SelectCmp recurrence kind");
+  }
+}
+
 Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
                                          const TargetTransformInfo *TTI,
                                          Value *Src, RecurKind RdxKind) {
@@ -1063,7 +1138,7 @@
 Value *llvm::createTargetReduction(IRBuilderBase &B,
                                    const TargetTransformInfo *TTI,
                                    const RecurrenceDescriptor &Desc, Value *Src,
-                                   PHINode *OrigPhi) {
+                                   PHINode *OrigPhi, Value *SrcMask) {
   // TODO: Support in-order reductions based on the recurrence descriptor.
   // All ops in the reduction inherit fast-math-flags from the recurrence
   // descriptor.
@@ -1072,7 +1147,7 @@
 
   RecurKind RK = Desc.getRecurrenceKind();
   if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
-    return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi);
+    return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi, SrcMask);
 
   return createSimpleTargetReduction(B, TTI, Src, RK);
 }
@@ -1089,6 +1164,17 @@
   return B.CreateFAddReduce(Start, Src);
 }
 
+Value *llvm::createSentinelValueHandling(IRBuilderBase &Builder,
+                                         const TargetTransformInfo *TTI,
+                                         const RecurrenceDescriptor &Desc,
+                                         Value *Rdx) {
+  Value *InitVal = Desc.getRecurrenceStartValue();
+  Value *Iden = Desc.getRecurrenceIdentity(
+      Desc.getRecurrenceKind(), Rdx->getType(), Desc.getFastMathFlags());
+  Value *Cmp = Builder.CreateCmp(CmpInst::ICMP_NE, Rdx, Iden, "rdx.select.cmp");
+  return Builder.CreateSelect(Cmp, Rdx, InitVal, "rdx.select");
+}
+
 void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,
                             bool IncludeWrapFlags) {
   auto *VecOp = dyn_cast<Instruction>(I);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -878,6 +878,21 @@
     } // next instr.
   }
 
+  // Second comfirm the incomplete reductions
+  for (auto R : Reductions) {
+    RecurrenceDescriptor &RedDes = Reductions.find(R.first)->second;
+    if (!RedDes.hasUserRecurrence())
+      continue;
+
+    PHINode *UserPhi = RedDes.getUserRecurPhi();
+    if (!isReductionVariable(UserPhi))
+      return false;
+
+    RecurrenceDescriptor &UserRedDes = Reductions.find(UserPhi)->second;
+    if (!RedDes.fixUserRecurrence(UserRedDes))
+      return false;
+  }
+
   if (!PrimaryInduction) {
     if (Inductions.empty()) {
       reportVectorizationFailure("Did not find one integer induction var",
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -535,6 +535,11 @@
   // generated by fixReduction.
   PHINode *getReductionResumeValue(const RecurrenceDescriptor &RdxDesc);
 
+  // Returns the recurrence mask (mask.cmp) for a recurrence as generated by
+  // fixReduction.
+  std::pair<Value *, VectorParts>
+  getDependRecurrenceMask(const RecurrenceDescriptor &RdxDesc);
+
   /// Create a new phi node for the induction variable \p OrigPhi to resume
   /// iteration count in the scalar epilogue, from where the vectorized loop
   /// left off. In cases where the loop skeleton is more complicated (eg.
@@ -744,6 +749,12 @@
   // correct start value of reduction PHIs when vectorizing the epilogue.
   SmallMapVector<const RecurrenceDescriptor *, PHINode *, 4>
       ReductionResumeValues;
+
+  // Holds the masks for recurrences in the loops, be used for reduction when
+  // there is a reduction that depends on the recurrence.
+  SmallMapVector<const RecurrenceDescriptor *, std::pair<Value *, VectorParts>,
+                 4>
+      DependRecurrenceMasks;
 };
 
 class InnerLoopUnroller : public InnerLoopVectorizer {
@@ -1120,6 +1131,15 @@
   return It->second;
 }
 
+std::pair<Value *, InnerLoopVectorizer::VectorParts>
+InnerLoopVectorizer::getDependRecurrenceMask(
+    const RecurrenceDescriptor &RdxDesc) {
+  auto It = DependRecurrenceMasks.find(&RdxDesc);
+  assert(It != DependRecurrenceMasks.end() &&
+         "Expected to find a dependence mask for the recurrence.");
+  return It->second;
+}
+
 namespace llvm {
 
 // Loop vectorization cost-model hints how the scalar epilogue loop should be
@@ -3762,10 +3782,24 @@
   // the incoming edges.
   VPBasicBlock *Header =
       State.Plan->getVectorLoopRegion()->getEntryBasicBlock();
-  for (VPRecipeBase &R : Header->phis()) {
-    if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R))
+  // FIXME: Maybe I should not choose std::queue...
+  std::queue<VPRecipeBase *> Worklist;
+  for (VPRecipeBase &R : Header->phis())
+    Worklist.push(&R);
+
+  while (!Worklist.empty()) {
+    VPRecipeBase &R = *(Worklist.front());
+    Worklist.pop();
+    if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
+      const RecurrenceDescriptor &RecDesc =
+          ReductionPhi->getRecurrenceDescriptor();
+      RecurrenceDescriptor *DependRecDesc = RecDesc.getDependMinMaxRecDes();
+      if (DependRecDesc && !DependRecurrenceMasks.count(DependRecDesc)) {
+        Worklist.push(&R);
+        continue;
+      }
       fixReduction(ReductionPhi, State);
-    else if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
+    } else if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
       fixFixedOrderRecurrence(FOR, State);
   }
 }
@@ -3983,6 +4017,19 @@
   Value *ReducedPartRdx = State.get(LoopExitInstDef, 0);
   unsigned Op = RecurrenceDescriptor::getOpcode(RK);
 
+  // Get the reduction mask if the reduction depend on another one.
+  RecurrenceDescriptor *DependDesc = RdxDesc.getDependMinMaxRecDes();
+  Value *DependRdxMask = nullptr;
+  VectorParts DependPartMasks;
+  if (DependDesc) {
+    Builder.SetInsertPoint(&*LoopMiddleBlock->getTerminator());
+    std::tie(DependRdxMask, DependPartMasks) =
+        getDependRecurrenceMask(*DependDesc);
+  }
+
+  Value *NewRdxMask = nullptr;
+  VectorParts NewPartMasks(UF);
+
   // The middle block terminator has already been assigned a DebugLoc here (the
   // OrigLoop's single latch terminator). We want the whole middle block to
   // appear to execute on this line because: (a) it is all compiler generated,
@@ -3999,30 +4046,61 @@
     Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
     for (unsigned Part = 1; Part < UF; ++Part) {
       Value *RdxPart = State.get(LoopExitInstDef, Part);
+      Value *PartMask = DependDesc ? DependPartMasks[Part] : nullptr;
       if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
         ReducedPartRdx = Builder.CreateBinOp(
             (Instruction::BinaryOps)Op, RdxPart, ReducedPartRdx, "bin.rdx");
       } else if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
         ReducedPartRdx = createSelectCmpOp(Builder, ReductionStartValue, RK,
-                                           ReducedPartRdx, RdxPart);
-      else
+                                           ReducedPartRdx, RdxPart, PartMask);
+      else {
         ReducedPartRdx = createMinMaxOp(Builder, RK, ReducedPartRdx, RdxPart);
+        // Keep the part mask on demand.
+	if (RdxDesc.hasUserRecurrence()) {
+	    auto *SI = dyn_cast<SelectInst>(ReducedPartRdx);
+	    auto *CI = dyn_cast<CmpInst>(SI->getCondition());
+	    NewPartMasks[Part] = CI;
+	}
+      }
     }
   }
 
   // Create the reduction after the loop. Note that inloop reductions create the
   // target reduction in the loop using a Reduction recipe.
   if (VF.isVector() && !PhiR->isInLoop()) {
-    ReducedPartRdx =
-        createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, OrigPhi);
+    Value *ReducedPart = ReducedPartRdx;
+    ReducedPartRdx = createTargetReduction(
+        Builder, TTI, RdxDesc, ReducedPartRdx, OrigPhi, DependRdxMask);
     // If the reduction can be performed in a smaller type, we need to extend
     // the reduction to the wider type before we branch to the original loop.
     if (PhiTy != RdxDesc.getRecurrenceType())
       ReducedPartRdx = RdxDesc.isSigned()
                            ? Builder.CreateSExt(ReducedPartRdx, PhiTy)
                            : Builder.CreateZExt(ReducedPartRdx, PhiTy);
+
+    // Create depend recurrence mask on demand.
+    if (RdxDesc.hasUserRecurrence()) {
+      ElementCount EC =
+          cast<VectorType>(ReducedPart->getType())->getElementCount();
+      Value *RdxSplat = Builder.CreateVectorSplat(EC, ReducedPartRdx);
+      // FIXME: Not sure use FCMP_OEQ is right or not.
+      CmpInst::Predicate MaskPred =
+          (ReducedPartRdx->getType()->isFloatingPointTy()) ? CmpInst::FCMP_OEQ
+                                                           : CmpInst::ICMP_EQ;
+      NewRdxMask =
+          Builder.CreateCmp(MaskPred, RdxSplat, ReducedPart, "mask.cmp");
+    }
   }
 
+  if (RecurrenceDescriptor::isMinMaxIdxRecurrenceKind(RK) ||
+      (RK == RecurKind::SelectIVICmp) || (RK == RecurKind::SelectIVFCmp))
+    ReducedPartRdx =
+        createSentinelValueHandling(Builder, TTI, RdxDesc, ReducedPartRdx);
+
+  // Set the recurrence mask for this reduction on demand.
+  if (RdxDesc.hasUserRecurrence())
+    DependRecurrenceMasks.insert({&RdxDesc, {NewRdxMask, NewPartMasks}});
+
   PHINode *ResumePhi =
       dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1214,7 +1214,7 @@
   Value *Iden = nullptr;
   RecurKind RK = RdxDesc.getRecurrenceKind();
   if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) ||
-      RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) {
+      (RK == RecurKind::SelectICmp || RK == RecurKind::SelectFCmp)) {
     // MinMax reduction have the start value as their identify.
     if (ScalarPHI) {
       Iden = StartV;
@@ -1224,6 +1224,16 @@
       StartV = Iden =
           Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident");
     }
+  } else if (RecurrenceDescriptor::isMinMaxIdxRecurrenceKind(RK) ||
+             (RK == RecurKind::SelectIVICmp || RK == RecurKind::SelectIVFCmp)) {
+    StartV = Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
+                                                  RdxDesc.getFastMathFlags());
+
+    if (!ScalarPHI) {
+      IRBuilderBase::InsertPointGuard IPBuilder(Builder);
+      Builder.SetInsertPoint(VectorPH->getTerminator());
+      StartV = Iden = Builder.CreateVectorSplat(State.VF, Iden);
+    }
   } else {
     Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(),
                                          RdxDesc.getFastMathFlags());
diff --git a/llvm/test/Transforms/LoopVectorize/select-min-index.ll b/llvm/test/Transforms/LoopVectorize/select-min-index.ll
--- a/llvm/test/Transforms/LoopVectorize/select-min-index.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-min-index.ll
@@ -1,6 +1,7 @@
-; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
-; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s
-; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s --check-prefix=CHECK-VF4IC1 --check-prefix=CHECK
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s --check-prefix=CHECK-VF4IC2 --check-prefix=CHECK
+; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S %s | FileCheck %s --check-prefix=CHECK-VF1IC2 --check-prefix=CHECK
 
 ; Test cases for selecting the index with the minimum value.
 
@@ -30,8 +31,187 @@
 }
 
 define i64 @test_vectorize_select_umin_idx_all_exit_inst(ptr %src, ptr %umin) {
-; CHECK-LABEL: @test_vectorize_select_umin_idx_all_exit_inst(
-; CHECK-NOT:   vector.body:
+; CHECK-VF4IC1-LABEL: @test_vectorize_select_umin_idx_all_exit_inst(
+; CHECK-VF4IC1-NEXT:  entry:
+; CHECK-VF4IC1-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC1:       vector.ph:
+; CHECK-VF4IC1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC1:       vector.body:
+; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr i64, ptr [[SRC:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0
+; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4
+; CHECK-VF4IC1-NEXT:    [[TMP3:%.*]] = icmp ugt <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]]
+; CHECK-VF4IC1-NEXT:    [[TMP4]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]])
+; CHECK-VF4IC1-NEXT:    [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-VF4IC1-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF4IC1:       middle.block:
+; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[TMP4]])
+; CHECK-VF4IC1-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP7]], i64 0
+; CHECK-VF4IC1-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC1-NEXT:    [[MASK_CMP:%.*]] = icmp eq <4 x i64> [[DOTSPLAT]], [[TMP4]]
+; CHECK-VF4IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 0, 0
+; CHECK-VF4IC1-NEXT:    [[MASK_SELECT:%.*]] = select <4 x i1> [[MASK_CMP]], <4 x i64> [[TMP5]], <4 x i64> <i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807>
+; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[MASK_SELECT]])
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 0
+; CHECK-VF4IC1-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4IC1:       scalar.ph:
+; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX2:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    br label [[LOOP:%.*]]
+; CHECK-VF4IC1:       loop:
+; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF4IC1-NEXT:    [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF4IC1-NEXT:    [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF4IC1-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-VF4IC1-NEXT:    [[L:%.*]] = load i64, ptr [[GEP]], align 4
+; CHECK-VF4IC1-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]]
+; CHECK-VF4IC1-NEXT:    [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]])
+; CHECK-VF4IC1-NEXT:    [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]]
+; CHECK-VF4IC1-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-VF4IC1:       exit:
+; CHECK-VF4IC1-NEXT:    [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[RES_UMIN:%.*]] = phi i64 [ [[MIN_VAL_NEXT]], [[LOOP]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    store i64 [[RES_UMIN]], ptr [[UMIN:%.*]], align 4
+; CHECK-VF4IC1-NEXT:    ret i64 [[RES]]
+;
+; CHECK-VF4IC2-LABEL: @test_vectorize_select_umin_idx_all_exit_inst(
+; CHECK-VF4IC2-NEXT:  entry:
+; CHECK-VF4IC2-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC2:       vector.ph:
+; CHECK-VF4IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC2:       vector.body:
+; CHECK-VF4IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC2-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC2-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC2-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC2-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4IC2-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[SRC:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC2-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP1]]
+; CHECK-VF4IC2-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0
+; CHECK-VF4IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 4
+; CHECK-VF4IC2-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[TMP2]], i32 4
+; CHECK-VF4IC2-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i64>, ptr [[TMP5]], align 4
+; CHECK-VF4IC2-NEXT:    [[TMP6:%.*]] = icmp ugt <4 x i64> [[VEC_PHI3]], [[WIDE_LOAD]]
+; CHECK-VF4IC2-NEXT:    [[TMP7:%.*]] = icmp ugt <4 x i64> [[VEC_PHI4]], [[WIDE_LOAD5]]
+; CHECK-VF4IC2-NEXT:    [[TMP8]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI3]], <4 x i64> [[WIDE_LOAD]])
+; CHECK-VF4IC2-NEXT:    [[TMP9]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[VEC_PHI4]], <4 x i64> [[WIDE_LOAD5]])
+; CHECK-VF4IC2-NEXT:    [[TMP10]] = select <4 x i1> [[TMP6]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; CHECK-VF4IC2-NEXT:    [[TMP11]] = select <4 x i1> [[TMP7]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI2]]
+; CHECK-VF4IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-VF4IC2-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-VF4IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF4IC2:       middle.block:
+; CHECK-VF4IC2-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult <4 x i64> [[TMP8]], [[TMP9]]
+; CHECK-VF4IC2-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i64> [[TMP8]], <4 x i64> [[TMP9]]
+; CHECK-VF4IC2-NEXT:    [[TMP13:%.*]] = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> [[RDX_MINMAX_SELECT]])
+; CHECK-VF4IC2-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP13]], i64 0
+; CHECK-VF4IC2-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC2-NEXT:    [[MASK_CMP:%.*]] = icmp eq <4 x i64> [[DOTSPLAT]], [[RDX_MINMAX_SELECT]]
+; CHECK-VF4IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 0, 0
+; CHECK-VF4IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ule <4 x i64> [[TMP8]], [[TMP9]]
+; CHECK-VF4IC2-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i64> [[TMP10]], <4 x i64> [[TMP11]]
+; CHECK-VF4IC2-NEXT:    [[MASK_SELECT:%.*]] = select <4 x i1> [[MASK_CMP]], <4 x i64> [[RDX_SELECT]], <4 x i64> <i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807>
+; CHECK-VF4IC2-NEXT:    [[TMP14:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[MASK_SELECT]])
+; CHECK-VF4IC2-NEXT:    [[RDX_SELECT_CMP6:%.*]] = icmp ne i64 [[TMP14]], -9223372036854775808
+; CHECK-VF4IC2-NEXT:    [[RDX_SELECT7:%.*]] = select i1 [[RDX_SELECT_CMP6]], i64 [[TMP14]], i64 0
+; CHECK-VF4IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4IC2:       scalar.ph:
+; CHECK-VF4IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC2-NEXT:    [[BC_MERGE_RDX8:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RDX_SELECT7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC2-NEXT:    br label [[LOOP:%.*]]
+; CHECK-VF4IC2:       loop:
+; CHECK-VF4IC2-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF4IC2-NEXT:    [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX8]], [[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF4IC2-NEXT:    [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF4IC2-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-VF4IC2-NEXT:    [[L:%.*]] = load i64, ptr [[GEP]], align 4
+; CHECK-VF4IC2-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]]
+; CHECK-VF4IC2-NEXT:    [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]])
+; CHECK-VF4IC2-NEXT:    [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]]
+; CHECK-VF4IC2-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-VF4IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; CHECK-VF4IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-VF4IC2:       exit:
+; CHECK-VF4IC2-NEXT:    [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], [[LOOP]] ], [ [[RDX_SELECT7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC2-NEXT:    [[RES_UMIN:%.*]] = phi i64 [ [[MIN_VAL_NEXT]], [[LOOP]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC2-NEXT:    store i64 [[RES_UMIN]], ptr [[UMIN:%.*]], align 4
+; CHECK-VF4IC2-NEXT:    ret i64 [[RES]]
+;
+; CHECK-VF1IC2-LABEL: @test_vectorize_select_umin_idx_all_exit_inst(
+; CHECK-VF1IC2-NEXT:  entry:
+; CHECK-VF1IC2-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1IC2:       vector.ph:
+; CHECK-VF1IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1IC2:       vector.body:
+; CHECK-VF1IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC2-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC2-NEXT:    [[VEC_PHI2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC2-NEXT:    [[VEC_PHI3:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1IC2-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[SRC:%.*]], i64 [[TMP0]]
+; CHECK-VF1IC2-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP1]]
+; CHECK-VF1IC2-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP2]], align 4
+; CHECK-VF1IC2-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 4
+; CHECK-VF1IC2-NEXT:    [[TMP6:%.*]] = icmp ugt i64 [[VEC_PHI2]], [[TMP4]]
+; CHECK-VF1IC2-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[VEC_PHI3]], [[TMP5]]
+; CHECK-VF1IC2-NEXT:    [[TMP8]] = tail call i64 @llvm.umin.i64(i64 [[VEC_PHI2]], i64 [[TMP4]])
+; CHECK-VF1IC2-NEXT:    [[TMP9]] = tail call i64 @llvm.umin.i64(i64 [[VEC_PHI3]], i64 [[TMP5]])
+; CHECK-VF1IC2-NEXT:    [[TMP10]] = select i1 [[TMP6]], i64 [[TMP0]], i64 [[VEC_PHI]]
+; CHECK-VF1IC2-NEXT:    [[TMP11]] = select i1 [[TMP7]], i64 [[TMP1]], i64 [[VEC_PHI1]]
+; CHECK-VF1IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-VF1IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-VF1IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF1IC2:       middle.block:
+; CHECK-VF1IC2-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ult i64 [[TMP8]], [[TMP9]]
+; CHECK-VF1IC2-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i64 [[TMP8]], i64 [[TMP9]]
+; CHECK-VF1IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 0, 0
+; CHECK-VF1IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ule i64 [[TMP8]], [[TMP9]]
+; CHECK-VF1IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP10]], i64 [[TMP11]]
+; CHECK-VF1IC2-NEXT:    [[RDX_SELECT_CMP4:%.*]] = icmp ne i64 [[RDX_SELECT]], -9223372036854775808
+; CHECK-VF1IC2-NEXT:    [[RDX_SELECT5:%.*]] = select i1 [[RDX_SELECT_CMP4]], i64 [[RDX_SELECT]], i64 0
+; CHECK-VF1IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1IC2:       scalar.ph:
+; CHECK-VF1IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF1IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC2-NEXT:    [[BC_MERGE_RDX6:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC2-NEXT:    br label [[LOOP:%.*]]
+; CHECK-VF1IC2:       loop:
+; CHECK-VF1IC2-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF1IC2-NEXT:    [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX6]], [[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF1IC2-NEXT:    [[MIN_VAL:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF1IC2-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-VF1IC2-NEXT:    [[L:%.*]] = load i64, ptr [[GEP]], align 4
+; CHECK-VF1IC2-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[MIN_VAL]], [[L]]
+; CHECK-VF1IC2-NEXT:    [[MIN_VAL_NEXT]] = tail call i64 @llvm.umin.i64(i64 [[MIN_VAL]], i64 [[L]])
+; CHECK-VF1IC2-NEXT:    [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]]
+; CHECK-VF1IC2-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-VF1IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; CHECK-VF1IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-VF1IC2:       exit:
+; CHECK-VF1IC2-NEXT:    [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], [[LOOP]] ], [ [[RDX_SELECT5]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC2-NEXT:    [[RES_UMIN:%.*]] = phi i64 [ [[MIN_VAL_NEXT]], [[LOOP]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC2-NEXT:    store i64 [[RES_UMIN]], ptr [[UMIN:%.*]], align 4
+; CHECK-VF1IC2-NEXT:    ret i64 [[RES]]
 ;
 entry:
   br label %loop
@@ -82,8 +262,178 @@
 }
 
 define i64 @test_not_vectorize_select_no_min_reduction(ptr %src) {
-; CHECK-LABEL: @test_not_vectorize_select_no_min_reduction(
-; CHECK-NOT:   vector.body:
+; CHECK-VF4IC1-LABEL: @test_not_vectorize_select_no_min_reduction(
+; CHECK-VF4IC1-NEXT:  entry:
+; CHECK-VF4IC1-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC1:       vector.ph:
+; CHECK-VF4IC1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC1:       vector.body:
+; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr i64, ptr [[SRC:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0
+; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4
+; CHECK-VF4IC1-NEXT:    [[TMP3]] = add <4 x i64> [[WIDE_LOAD]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-VF4IC1-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-VF4IC1-NEXT:    [[TMP5:%.*]] = icmp ugt <4 x i64> [[TMP4]], [[WIDE_LOAD]]
+; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP4]], <4 x i64> [[WIDE_LOAD]])
+; CHECK-VF4IC1-NEXT:    [[TMP7]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-VF4IC1-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF4IC1:       middle.block:
+; CHECK-VF4IC1-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP7]])
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP9]], -9223372036854775808
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP9]], i64 0
+; CHECK-VF4IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 0, 0
+; CHECK-VF4IC1-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; CHECK-VF4IC1-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP3]], i32 2
+; CHECK-VF4IC1-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4IC1:       scalar.ph:
+; CHECK-VF4IC1-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    br label [[LOOP:%.*]]
+; CHECK-VF4IC1:       loop:
+; CHECK-VF4IC1-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF4IC1-NEXT:    [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF4IC1-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF4IC1-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-VF4IC1-NEXT:    [[L:%.*]] = load i64, ptr [[GEP]], align 4
+; CHECK-VF4IC1-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[SCALAR_RECUR]], [[L]]
+; CHECK-VF4IC1-NEXT:    [[MIN_VAL_NEXT]] = add i64 [[L]], 1
+; CHECK-VF4IC1-NEXT:    [[FOO:%.*]] = call i64 @llvm.umin.i64(i64 [[SCALAR_RECUR]], i64 [[L]])
+; CHECK-VF4IC1-NEXT:    [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]]
+; CHECK-VF4IC1-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-VF4IC1:       exit:
+; CHECK-VF4IC1-NEXT:    [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    ret i64 [[RES]]
+;
+; CHECK-VF4IC2-LABEL: @test_not_vectorize_select_no_min_reduction(
+; CHECK-VF4IC2-NEXT:  entry:
+; CHECK-VF4IC2-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC2:       vector.ph:
+; CHECK-VF4IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC2:       vector.body:
+; CHECK-VF4IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC2-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC2-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC2-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC2-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC2-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4IC2-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[SRC:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC2-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP1]]
+; CHECK-VF4IC2-NEXT:    [[TMP4:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0
+; CHECK-VF4IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 4
+; CHECK-VF4IC2-NEXT:    [[TMP5:%.*]] = getelementptr i64, ptr [[TMP2]], i32 4
+; CHECK-VF4IC2-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i64>, ptr [[TMP5]], align 4
+; CHECK-VF4IC2-NEXT:    [[TMP6:%.*]] = add <4 x i64> [[WIDE_LOAD]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-VF4IC2-NEXT:    [[TMP7]] = add <4 x i64> [[WIDE_LOAD3]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-VF4IC2-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP6]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-VF4IC2-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-VF4IC2-NEXT:    [[TMP10:%.*]] = icmp ugt <4 x i64> [[TMP8]], [[WIDE_LOAD]]
+; CHECK-VF4IC2-NEXT:    [[TMP11:%.*]] = icmp ugt <4 x i64> [[TMP9]], [[WIDE_LOAD3]]
+; CHECK-VF4IC2-NEXT:    [[TMP12:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD]])
+; CHECK-VF4IC2-NEXT:    [[TMP13:%.*]] = call <4 x i64> @llvm.umin.v4i64(<4 x i64> [[TMP9]], <4 x i64> [[WIDE_LOAD3]])
+; CHECK-VF4IC2-NEXT:    [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; CHECK-VF4IC2-NEXT:    [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI2]]
+; CHECK-VF4IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-VF4IC2-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC2-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-VF4IC2-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF4IC2:       middle.block:
+; CHECK-VF4IC2-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i64> [[TMP14]], [[TMP15]]
+; CHECK-VF4IC2-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i64> [[TMP14]], <4 x i64> [[TMP15]]
+; CHECK-VF4IC2-NEXT:    [[TMP17:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX_SELECT]])
+; CHECK-VF4IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP17]], -9223372036854775808
+; CHECK-VF4IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP17]], i64 0
+; CHECK-VF4IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 0, 0
+; CHECK-VF4IC2-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
+; CHECK-VF4IC2-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
+; CHECK-VF4IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4IC2:       scalar.ph:
+; CHECK-VF4IC2-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-VF4IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC2-NEXT:    br label [[LOOP:%.*]]
+; CHECK-VF4IC2:       loop:
+; CHECK-VF4IC2-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF4IC2-NEXT:    [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF4IC2-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF4IC2-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-VF4IC2-NEXT:    [[L:%.*]] = load i64, ptr [[GEP]], align 4
+; CHECK-VF4IC2-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[SCALAR_RECUR]], [[L]]
+; CHECK-VF4IC2-NEXT:    [[MIN_VAL_NEXT]] = add i64 [[L]], 1
+; CHECK-VF4IC2-NEXT:    [[FOO:%.*]] = call i64 @llvm.umin.i64(i64 [[SCALAR_RECUR]], i64 [[L]])
+; CHECK-VF4IC2-NEXT:    [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]]
+; CHECK-VF4IC2-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-VF4IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; CHECK-VF4IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-VF4IC2:       exit:
+; CHECK-VF4IC2-NEXT:    [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC2-NEXT:    ret i64 [[RES]]
+;
+; CHECK-VF1IC2-LABEL: @test_not_vectorize_select_no_min_reduction(
+; CHECK-VF1IC2-NEXT:  entry:
+; CHECK-VF1IC2-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1IC2:       vector.ph:
+; CHECK-VF1IC2-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1IC2:       vector.body:
+; CHECK-VF1IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC2-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC2-NEXT:    [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC2-NEXT:    [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC2-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1IC2-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1IC2-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[SRC:%.*]], i64 [[TMP0]]
+; CHECK-VF1IC2-NEXT:    [[TMP3:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[TMP1]]
+; CHECK-VF1IC2-NEXT:    [[TMP4:%.*]] = load i64, ptr [[TMP2]], align 4
+; CHECK-VF1IC2-NEXT:    [[TMP5:%.*]] = load i64, ptr [[TMP3]], align 4
+; CHECK-VF1IC2-NEXT:    [[TMP6:%.*]] = add i64 [[TMP4]], 1
+; CHECK-VF1IC2-NEXT:    [[TMP7]] = add i64 [[TMP5]], 1
+; CHECK-VF1IC2-NEXT:    [[TMP8:%.*]] = icmp ugt i64 [[VECTOR_RECUR]], [[TMP4]]
+; CHECK-VF1IC2-NEXT:    [[TMP9:%.*]] = icmp ugt i64 [[TMP6]], [[TMP5]]
+; CHECK-VF1IC2-NEXT:    [[TMP10]] = select i1 [[TMP8]], i64 [[TMP0]], i64 [[VEC_PHI]]
+; CHECK-VF1IC2-NEXT:    [[TMP11]] = select i1 [[TMP9]], i64 [[TMP1]], i64 [[VEC_PHI1]]
+; CHECK-VF1IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-VF1IC2-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-VF1IC2-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF1IC2:       middle.block:
+; CHECK-VF1IC2-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]]
+; CHECK-VF1IC2-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i64 [[TMP10]], i64 [[TMP11]]
+; CHECK-VF1IC2-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_MINMAX_SELECT]], -9223372036854775808
+; CHECK-VF1IC2-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_MINMAX_SELECT]], i64 0
+; CHECK-VF1IC2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 0, 0
+; CHECK-VF1IC2-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1IC2:       scalar.ph:
+; CHECK-VF1IC2-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-VF1IC2-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC2-NEXT:    br label [[LOOP:%.*]]
+; CHECK-VF1IC2:       loop:
+; CHECK-VF1IC2-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF1IC2-NEXT:    [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF1IC2-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], [[LOOP]] ]
+; CHECK-VF1IC2-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-VF1IC2-NEXT:    [[L:%.*]] = load i64, ptr [[GEP]], align 4
+; CHECK-VF1IC2-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[SCALAR_RECUR]], [[L]]
+; CHECK-VF1IC2-NEXT:    [[MIN_VAL_NEXT]] = add i64 [[L]], 1
+; CHECK-VF1IC2-NEXT:    [[FOO:%.*]] = call i64 @llvm.umin.i64(i64 [[SCALAR_RECUR]], i64 [[L]])
+; CHECK-VF1IC2-NEXT:    [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]]
+; CHECK-VF1IC2-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-VF1IC2-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; CHECK-VF1IC2-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-VF1IC2:       exit:
+; CHECK-VF1IC2-NEXT:    [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC2-NEXT:    ret i64 [[RES]]
 ;
 entry:
   br label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/smax-idx.ll b/llvm/test/Transforms/LoopVectorize/smax-idx.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/smax-idx.ll
@@ -0,0 +1,1206 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -debug-only=loop-vectorize,iv-descriptors -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-VF4IC1 --check-prefix=CHECK
+; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=4 -debug-only=loop-vectorize,iv-descriptors -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-VF4IC4 --check-prefix=CHECK
+; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -debug-only=loop-vectorize,iv-descriptors -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-VF1IC4 --check-prefix=CHECK
+
+define i64 @smax_idx(ptr nocapture readonly %a, i64 %mm, i64 %ii, ptr nocapture writeonly %res_max, i64 %n) {
+; CHECK-VF4IC1-LABEL: @smax_idx(
+; CHECK-VF4IC1-NEXT:  entry:
+; CHECK-VF4IC1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
+; CHECK-VF4IC1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC1:       vector.ph:
+; CHECK-VF4IC1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-VF4IC1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4IC1-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[MM:%.*]], i64 0
+; CHECK-VF4IC1-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i64> [[MINMAX_IDENT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC1:       vector.body:
+; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
+; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4
+; CHECK-VF4IC1-NEXT:    [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI]], <4 x i64> [[WIDE_LOAD]])
+; CHECK-VF4IC1-NEXT:    [[TMP4:%.*]] = icmp slt <4 x i64> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-VF4IC1-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI1]]
+; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4IC1-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF4IC1:       middle.block:
+; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]])
+; CHECK-VF4IC1-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP7]], i64 0
+; CHECK-VF4IC1-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC1-NEXT:    [[MASK_CMP:%.*]] = icmp eq <4 x i64> [[DOTSPLAT]], [[TMP3]]
+; CHECK-VF4IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4IC1-NEXT:    [[MASK_SELECT:%.*]] = select <4 x i1> [[MASK_CMP]], <4 x i64> [[TMP5]], <4 x i64> <i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807>
+; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[MASK_SELECT]])
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[II:%.*]]
+; CHECK-VF4IC1-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4IC1:       scalar.ph:
+; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[MM]], [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[II]], [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4IC1:       for.body:
+; CHECK-VF4IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[MAX_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[IDX_011:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[SPEC_SELECT7:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4IC1-NEXT:    [[TMP9:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4IC1-NEXT:    [[TMP10]] = tail call i64 @llvm.smax.i64(i64 [[MAX_09]], i64 [[TMP9]])
+; CHECK-VF4IC1-NEXT:    [[CMP1:%.*]] = icmp slt i64 [[MAX_09]], [[TMP9]]
+; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT7]] = select i1 [[CMP1]], i64 [[INDVARS_IV]], i64 [[IDX_011]]
+; CHECK-VF4IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-VF4IC1:       exit:
+; CHECK-VF4IC1-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[TMP10]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT7_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT7]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    store i64 [[DOTLCSSA]], ptr [[RES_MAX:%.*]], align 4
+; CHECK-VF4IC1-NEXT:    ret i64 [[SPEC_SELECT7_LCSSA]]
+;
+; CHECK-VF4IC4-LABEL: @smax_idx(
+; CHECK-VF4IC4-NEXT:  entry:
+; CHECK-VF4IC4-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16
+; CHECK-VF4IC4-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC4:       vector.ph:
+; CHECK-VF4IC4-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
+; CHECK-VF4IC4-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4IC4-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[MM:%.*]], i64 0
+; CHECK-VF4IC4-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i64> [[MINMAX_IDENT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC4-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC4:       vector.body:
+; CHECK-VF4IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI7:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI8:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI9:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI10:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 8
+; CHECK-VF4IC4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 12
+; CHECK-VF4IC4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4IC4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-VF4IC4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-VF4IC4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP8]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD11:%.*]] = load <4 x i64>, ptr [[TMP9]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD12:%.*]] = load <4 x i64>, ptr [[TMP10]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD13:%.*]] = load <4 x i64>, ptr [[TMP11]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP12]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI]], <4 x i64> [[WIDE_LOAD]])
+; CHECK-VF4IC4-NEXT:    [[TMP13]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI4]], <4 x i64> [[WIDE_LOAD11]])
+; CHECK-VF4IC4-NEXT:    [[TMP14]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI5]], <4 x i64> [[WIDE_LOAD12]])
+; CHECK-VF4IC4-NEXT:    [[TMP15]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI6]], <4 x i64> [[WIDE_LOAD13]])
+; CHECK-VF4IC4-NEXT:    [[TMP16:%.*]] = icmp slt <4 x i64> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-VF4IC4-NEXT:    [[TMP17:%.*]] = icmp slt <4 x i64> [[VEC_PHI4]], [[WIDE_LOAD11]]
+; CHECK-VF4IC4-NEXT:    [[TMP18:%.*]] = icmp slt <4 x i64> [[VEC_PHI5]], [[WIDE_LOAD12]]
+; CHECK-VF4IC4-NEXT:    [[TMP19:%.*]] = icmp slt <4 x i64> [[VEC_PHI6]], [[WIDE_LOAD13]]
+; CHECK-VF4IC4-NEXT:    [[TMP20]] = select <4 x i1> [[TMP16]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI7]]
+; CHECK-VF4IC4-NEXT:    [[TMP21]] = select <4 x i1> [[TMP17]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI8]]
+; CHECK-VF4IC4-NEXT:    [[TMP22]] = select <4 x i1> [[TMP18]], <4 x i64> [[STEP_ADD1]], <4 x i64> [[VEC_PHI9]]
+; CHECK-VF4IC4-NEXT:    [[TMP23]] = select <4 x i1> [[TMP19]], <4 x i64> [[STEP_ADD2]], <4 x i64> [[VEC_PHI10]]
+; CHECK-VF4IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-VF4IC4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4IC4-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF4IC4:       middle.block:
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i64> [[TMP12]], [[TMP13]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i64> [[TMP12]], <4 x i64> [[TMP13]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_CMP14:%.*]] = icmp sgt <4 x i64> [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_SELECT15:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP14]], <4 x i64> [[RDX_MINMAX_SELECT]], <4 x i64> [[TMP14]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_CMP16:%.*]] = icmp sgt <4 x i64> [[RDX_MINMAX_SELECT15]], [[TMP15]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_SELECT17:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP16]], <4 x i64> [[RDX_MINMAX_SELECT15]], <4 x i64> [[TMP15]]
+; CHECK-VF4IC4-NEXT:    [[TMP25:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX_SELECT17]])
+; CHECK-VF4IC4-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP25]], i64 0
+; CHECK-VF4IC4-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC4-NEXT:    [[MASK_CMP:%.*]] = icmp eq <4 x i64> [[DOTSPLAT]], [[RDX_MINMAX_SELECT17]]
+; CHECK-VF4IC4-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp sge <4 x i64> [[TMP12]], [[TMP13]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i64> [[TMP20]], <4 x i64> [[TMP21]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP18:%.*]] = icmp sge <4 x i64> [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT19:%.*]] = select <4 x i1> [[RDX_SELECT_CMP18]], <4 x i64> [[RDX_SELECT]], <4 x i64> [[TMP22]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP20:%.*]] = icmp sge <4 x i64> [[RDX_MINMAX_SELECT15]], [[TMP15]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT21:%.*]] = select <4 x i1> [[RDX_SELECT_CMP20]], <4 x i64> [[RDX_SELECT19]], <4 x i64> [[TMP23]]
+; CHECK-VF4IC4-NEXT:    [[MASK_SELECT:%.*]] = select <4 x i1> [[MASK_CMP]], <4 x i64> [[RDX_SELECT21]], <4 x i64> <i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807>
+; CHECK-VF4IC4-NEXT:    [[TMP26:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[MASK_SELECT]])
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP22:%.*]] = icmp ne i64 [[TMP26]], -9223372036854775808
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT23:%.*]] = select i1 [[RDX_SELECT_CMP22]], i64 [[TMP26]], i64 [[II:%.*]]
+; CHECK-VF4IC4-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4IC4:       scalar.ph:
+; CHECK-VF4IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[MM]], [[ENTRY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX24:%.*]] = phi i64 [ [[II]], [[ENTRY]] ], [ [[RDX_SELECT23]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4IC4:       for.body:
+; CHECK-VF4IC4-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[MAX_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[IDX_011:%.*]] = phi i64 [ [[BC_MERGE_RDX24]], [[SCALAR_PH]] ], [ [[SPEC_SELECT7:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4IC4-NEXT:    [[TMP27:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP28]] = tail call i64 @llvm.smax.i64(i64 [[MAX_09]], i64 [[TMP27]])
+; CHECK-VF4IC4-NEXT:    [[CMP1:%.*]] = icmp slt i64 [[MAX_09]], [[TMP27]]
+; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT7]] = select i1 [[CMP1]], i64 [[INDVARS_IV]], i64 [[IDX_011]]
+; CHECK-VF4IC4-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-VF4IC4:       exit:
+; CHECK-VF4IC4-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[TMP28]], [[FOR_BODY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT7_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT7]], [[FOR_BODY]] ], [ [[RDX_SELECT23]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    store i64 [[DOTLCSSA]], ptr [[RES_MAX:%.*]], align 4
+; CHECK-VF4IC4-NEXT:    ret i64 [[SPEC_SELECT7_LCSSA]]
+;
+; CHECK-VF1IC4-LABEL: @smax_idx(
+; CHECK-VF1IC4-NEXT:  entry:
+; CHECK-VF1IC4-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
+; CHECK-VF1IC4-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1IC4:       vector.ph:
+; CHECK-VF1IC4-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-VF1IC4-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF1IC4-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1IC4:       vector.body:
+; CHECK-VF1IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ [[MM:%.*]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI1:%.*]] = phi i64 [ [[MM]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI2:%.*]] = phi i64 [ [[MM]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI3:%.*]] = phi i64 [ [[MM]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI4:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI5:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI6:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI7:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1IC4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-VF1IC4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-VF1IC4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF1IC4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF1IC4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-VF1IC4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-VF1IC4-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP12]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI]], i64 [[TMP8]])
+; CHECK-VF1IC4-NEXT:    [[TMP13]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI1]], i64 [[TMP9]])
+; CHECK-VF1IC4-NEXT:    [[TMP14]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI2]], i64 [[TMP10]])
+; CHECK-VF1IC4-NEXT:    [[TMP15]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI3]], i64 [[TMP11]])
+; CHECK-VF1IC4-NEXT:    [[TMP16:%.*]] = icmp slt i64 [[VEC_PHI]], [[TMP8]]
+; CHECK-VF1IC4-NEXT:    [[TMP17:%.*]] = icmp slt i64 [[VEC_PHI1]], [[TMP9]]
+; CHECK-VF1IC4-NEXT:    [[TMP18:%.*]] = icmp slt i64 [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF1IC4-NEXT:    [[TMP19:%.*]] = icmp slt i64 [[VEC_PHI3]], [[TMP11]]
+; CHECK-VF1IC4-NEXT:    [[TMP20]] = select i1 [[TMP16]], i64 [[TMP0]], i64 [[VEC_PHI4]]
+; CHECK-VF1IC4-NEXT:    [[TMP21]] = select i1 [[TMP17]], i64 [[TMP1]], i64 [[VEC_PHI5]]
+; CHECK-VF1IC4-NEXT:    [[TMP22]] = select i1 [[TMP18]], i64 [[TMP2]], i64 [[VEC_PHI6]]
+; CHECK-VF1IC4-NEXT:    [[TMP23]] = select i1 [[TMP19]], i64 [[TMP3]], i64 [[VEC_PHI7]]
+; CHECK-VF1IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF1IC4-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1IC4-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF1IC4:       middle.block:
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i64 [[TMP12]], i64 [[TMP13]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_CMP8:%.*]] = icmp sgt i64 [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_SELECT9:%.*]] = select i1 [[RDX_MINMAX_CMP8]], i64 [[RDX_MINMAX_SELECT]], i64 [[TMP14]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_CMP10:%.*]] = icmp sgt i64 [[RDX_MINMAX_SELECT9]], [[TMP15]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_SELECT11:%.*]] = select i1 [[RDX_MINMAX_CMP10]], i64 [[RDX_MINMAX_SELECT9]], i64 [[TMP15]]
+; CHECK-VF1IC4-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp sge i64 [[TMP12]], [[TMP13]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[TMP21]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP12:%.*]] = icmp sge i64 [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT13:%.*]] = select i1 [[RDX_SELECT_CMP12]], i64 [[RDX_SELECT]], i64 [[TMP22]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP14:%.*]] = icmp sge i64 [[RDX_MINMAX_SELECT9]], [[TMP15]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT15:%.*]] = select i1 [[RDX_SELECT_CMP14]], i64 [[RDX_SELECT13]], i64 [[TMP23]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP16:%.*]] = icmp ne i64 [[RDX_SELECT15]], -9223372036854775808
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT17:%.*]] = select i1 [[RDX_SELECT_CMP16]], i64 [[RDX_SELECT15]], i64 [[II:%.*]]
+; CHECK-VF1IC4-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1IC4:       scalar.ph:
+; CHECK-VF1IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[MM]], [[ENTRY]] ], [ [[RDX_MINMAX_SELECT11]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX18:%.*]] = phi i64 [ [[II]], [[ENTRY]] ], [ [[RDX_SELECT17]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1IC4:       for.body:
+; CHECK-VF1IC4-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[MAX_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP26:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[IDX_011:%.*]] = phi i64 [ [[BC_MERGE_RDX18]], [[SCALAR_PH]] ], [ [[SPEC_SELECT7:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1IC4-NEXT:    [[TMP25:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP26]] = tail call i64 @llvm.smax.i64(i64 [[MAX_09]], i64 [[TMP25]])
+; CHECK-VF1IC4-NEXT:    [[CMP1:%.*]] = icmp slt i64 [[MAX_09]], [[TMP25]]
+; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT7]] = select i1 [[CMP1]], i64 [[INDVARS_IV]], i64 [[IDX_011]]
+; CHECK-VF1IC4-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-VF1IC4:       exit:
+; CHECK-VF1IC4-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[TMP26]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT11]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT7_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT7]], [[FOR_BODY]] ], [ [[RDX_SELECT17]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    store i64 [[DOTLCSSA]], ptr [[RES_MAX:%.*]], align 4
+; CHECK-VF1IC4-NEXT:    ret i64 [[SPEC_SELECT7_LCSSA]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.09 = phi i64 [ %mm, %entry ], [ %1, %for.body ]
+  %idx.011 = phi i64 [ %ii, %entry ], [ %spec.select7, %for.body ]
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx
+  %1 = tail call i64 @llvm.smax.i64(i64 %max.09, i64 %0)
+  %cmp1 = icmp slt i64 %max.09, %0
+  %spec.select7 = select i1 %cmp1, i64 %indvars.iv, i64 %idx.011
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  store i64 %1, ptr %res_max
+  ret i64 %spec.select7
+}
+
+;
+; Check the different order of reduction phis.
+;
+define i64 @smax_idx_inverted_phi(ptr nocapture readonly %a, i64 %mm, i64 %ii, ptr nocapture writeonly %res_max, i64 %n) {
+; CHECK-VF4IC1-LABEL: @smax_idx_inverted_phi(
+; CHECK-VF4IC1-NEXT:  entry:
+; CHECK-VF4IC1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
+; CHECK-VF4IC1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC1:       vector.ph:
+; CHECK-VF4IC1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-VF4IC1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4IC1-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[MM:%.*]], i64 0
+; CHECK-VF4IC1-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i64> [[MINMAX_IDENT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC1:       vector.body:
+; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
+; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4
+; CHECK-VF4IC1-NEXT:    [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI1]], <4 x i64> [[WIDE_LOAD]])
+; CHECK-VF4IC1-NEXT:    [[TMP4:%.*]] = icmp slt <4 x i64> [[VEC_PHI1]], [[WIDE_LOAD]]
+; CHECK-VF4IC1-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4IC1-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF4IC1:       middle.block:
+; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]])
+; CHECK-VF4IC1-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP7]], i64 0
+; CHECK-VF4IC1-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC1-NEXT:    [[MASK_CMP:%.*]] = icmp eq <4 x i64> [[DOTSPLAT]], [[TMP3]]
+; CHECK-VF4IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4IC1-NEXT:    [[MASK_SELECT:%.*]] = select <4 x i1> [[MASK_CMP]], <4 x i64> [[TMP5]], <4 x i64> <i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807>
+; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[MASK_SELECT]])
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[II:%.*]]
+; CHECK-VF4IC1-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4IC1:       scalar.ph:
+; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[MM]], [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[II]], [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4IC1:       for.body:
+; CHECK-VF4IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[IDX_011:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[SPEC_SELECT7:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[MAX_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4IC1-NEXT:    [[TMP9:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4IC1-NEXT:    [[TMP10]] = tail call i64 @llvm.smax.i64(i64 [[MAX_09]], i64 [[TMP9]])
+; CHECK-VF4IC1-NEXT:    [[CMP1:%.*]] = icmp slt i64 [[MAX_09]], [[TMP9]]
+; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT7]] = select i1 [[CMP1]], i64 [[INDVARS_IV]], i64 [[IDX_011]]
+; CHECK-VF4IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-VF4IC1:       exit:
+; CHECK-VF4IC1-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[TMP10]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT7_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT7]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    store i64 [[DOTLCSSA]], ptr [[RES_MAX:%.*]], align 4
+; CHECK-VF4IC1-NEXT:    ret i64 [[SPEC_SELECT7_LCSSA]]
+;
+; CHECK-VF4IC4-LABEL: @smax_idx_inverted_phi(
+; CHECK-VF4IC4-NEXT:  entry:
+; CHECK-VF4IC4-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16
+; CHECK-VF4IC4-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC4:       vector.ph:
+; CHECK-VF4IC4-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
+; CHECK-VF4IC4-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4IC4-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[MM:%.*]], i64 0
+; CHECK-VF4IC4-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i64> [[MINMAX_IDENT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC4-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC4:       vector.body:
+; CHECK-VF4IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI8:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI9:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI10:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 8
+; CHECK-VF4IC4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 12
+; CHECK-VF4IC4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4IC4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-VF4IC4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-VF4IC4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP8]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD11:%.*]] = load <4 x i64>, ptr [[TMP9]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD12:%.*]] = load <4 x i64>, ptr [[TMP10]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD13:%.*]] = load <4 x i64>, ptr [[TMP11]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP12]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI7]], <4 x i64> [[WIDE_LOAD]])
+; CHECK-VF4IC4-NEXT:    [[TMP13]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI8]], <4 x i64> [[WIDE_LOAD11]])
+; CHECK-VF4IC4-NEXT:    [[TMP14]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI9]], <4 x i64> [[WIDE_LOAD12]])
+; CHECK-VF4IC4-NEXT:    [[TMP15]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI10]], <4 x i64> [[WIDE_LOAD13]])
+; CHECK-VF4IC4-NEXT:    [[TMP16:%.*]] = icmp slt <4 x i64> [[VEC_PHI7]], [[WIDE_LOAD]]
+; CHECK-VF4IC4-NEXT:    [[TMP17:%.*]] = icmp slt <4 x i64> [[VEC_PHI8]], [[WIDE_LOAD11]]
+; CHECK-VF4IC4-NEXT:    [[TMP18:%.*]] = icmp slt <4 x i64> [[VEC_PHI9]], [[WIDE_LOAD12]]
+; CHECK-VF4IC4-NEXT:    [[TMP19:%.*]] = icmp slt <4 x i64> [[VEC_PHI10]], [[WIDE_LOAD13]]
+; CHECK-VF4IC4-NEXT:    [[TMP20]] = select <4 x i1> [[TMP16]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; CHECK-VF4IC4-NEXT:    [[TMP21]] = select <4 x i1> [[TMP17]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI4]]
+; CHECK-VF4IC4-NEXT:    [[TMP22]] = select <4 x i1> [[TMP18]], <4 x i64> [[STEP_ADD1]], <4 x i64> [[VEC_PHI5]]
+; CHECK-VF4IC4-NEXT:    [[TMP23]] = select <4 x i1> [[TMP19]], <4 x i64> [[STEP_ADD2]], <4 x i64> [[VEC_PHI6]]
+; CHECK-VF4IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-VF4IC4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4IC4-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF4IC4:       middle.block:
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i64> [[TMP12]], [[TMP13]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i64> [[TMP12]], <4 x i64> [[TMP13]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_CMP14:%.*]] = icmp sgt <4 x i64> [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_SELECT15:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP14]], <4 x i64> [[RDX_MINMAX_SELECT]], <4 x i64> [[TMP14]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_CMP16:%.*]] = icmp sgt <4 x i64> [[RDX_MINMAX_SELECT15]], [[TMP15]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_SELECT17:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP16]], <4 x i64> [[RDX_MINMAX_SELECT15]], <4 x i64> [[TMP15]]
+; CHECK-VF4IC4-NEXT:    [[TMP25:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX_SELECT17]])
+; CHECK-VF4IC4-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP25]], i64 0
+; CHECK-VF4IC4-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC4-NEXT:    [[MASK_CMP:%.*]] = icmp eq <4 x i64> [[DOTSPLAT]], [[RDX_MINMAX_SELECT17]]
+; CHECK-VF4IC4-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp sge <4 x i64> [[TMP12]], [[TMP13]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i64> [[TMP20]], <4 x i64> [[TMP21]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP18:%.*]] = icmp sge <4 x i64> [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT19:%.*]] = select <4 x i1> [[RDX_SELECT_CMP18]], <4 x i64> [[RDX_SELECT]], <4 x i64> [[TMP22]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP20:%.*]] = icmp sge <4 x i64> [[RDX_MINMAX_SELECT15]], [[TMP15]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT21:%.*]] = select <4 x i1> [[RDX_SELECT_CMP20]], <4 x i64> [[RDX_SELECT19]], <4 x i64> [[TMP23]]
+; CHECK-VF4IC4-NEXT:    [[MASK_SELECT:%.*]] = select <4 x i1> [[MASK_CMP]], <4 x i64> [[RDX_SELECT21]], <4 x i64> <i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807>
+; CHECK-VF4IC4-NEXT:    [[TMP26:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[MASK_SELECT]])
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP22:%.*]] = icmp ne i64 [[TMP26]], -9223372036854775808
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT23:%.*]] = select i1 [[RDX_SELECT_CMP22]], i64 [[TMP26]], i64 [[II:%.*]]
+; CHECK-VF4IC4-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4IC4:       scalar.ph:
+; CHECK-VF4IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[MM]], [[ENTRY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX24:%.*]] = phi i64 [ [[II]], [[ENTRY]] ], [ [[RDX_SELECT23]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4IC4:       for.body:
+; CHECK-VF4IC4-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[IDX_011:%.*]] = phi i64 [ [[BC_MERGE_RDX24]], [[SCALAR_PH]] ], [ [[SPEC_SELECT7:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[MAX_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4IC4-NEXT:    [[TMP27:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP28]] = tail call i64 @llvm.smax.i64(i64 [[MAX_09]], i64 [[TMP27]])
+; CHECK-VF4IC4-NEXT:    [[CMP1:%.*]] = icmp slt i64 [[MAX_09]], [[TMP27]]
+; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT7]] = select i1 [[CMP1]], i64 [[INDVARS_IV]], i64 [[IDX_011]]
+; CHECK-VF4IC4-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-VF4IC4:       exit:
+; CHECK-VF4IC4-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[TMP28]], [[FOR_BODY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT7_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT7]], [[FOR_BODY]] ], [ [[RDX_SELECT23]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    store i64 [[DOTLCSSA]], ptr [[RES_MAX:%.*]], align 4
+; CHECK-VF4IC4-NEXT:    ret i64 [[SPEC_SELECT7_LCSSA]]
+;
+; CHECK-VF1IC4-LABEL: @smax_idx_inverted_phi(
+; CHECK-VF1IC4-NEXT:  entry:
+; CHECK-VF1IC4-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
+; CHECK-VF1IC4-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1IC4:       vector.ph:
+; CHECK-VF1IC4-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-VF1IC4-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF1IC4-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1IC4:       vector.body:
+; CHECK-VF1IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI1:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI2:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI3:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI4:%.*]] = phi i64 [ [[MM:%.*]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI5:%.*]] = phi i64 [ [[MM]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI6:%.*]] = phi i64 [ [[MM]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI7:%.*]] = phi i64 [ [[MM]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1IC4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-VF1IC4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-VF1IC4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF1IC4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF1IC4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-VF1IC4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-VF1IC4-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP12]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI4]], i64 [[TMP8]])
+; CHECK-VF1IC4-NEXT:    [[TMP13]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI5]], i64 [[TMP9]])
+; CHECK-VF1IC4-NEXT:    [[TMP14]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI6]], i64 [[TMP10]])
+; CHECK-VF1IC4-NEXT:    [[TMP15]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI7]], i64 [[TMP11]])
+; CHECK-VF1IC4-NEXT:    [[TMP16:%.*]] = icmp slt i64 [[VEC_PHI4]], [[TMP8]]
+; CHECK-VF1IC4-NEXT:    [[TMP17:%.*]] = icmp slt i64 [[VEC_PHI5]], [[TMP9]]
+; CHECK-VF1IC4-NEXT:    [[TMP18:%.*]] = icmp slt i64 [[VEC_PHI6]], [[TMP10]]
+; CHECK-VF1IC4-NEXT:    [[TMP19:%.*]] = icmp slt i64 [[VEC_PHI7]], [[TMP11]]
+; CHECK-VF1IC4-NEXT:    [[TMP20]] = select i1 [[TMP16]], i64 [[TMP0]], i64 [[VEC_PHI]]
+; CHECK-VF1IC4-NEXT:    [[TMP21]] = select i1 [[TMP17]], i64 [[TMP1]], i64 [[VEC_PHI1]]
+; CHECK-VF1IC4-NEXT:    [[TMP22]] = select i1 [[TMP18]], i64 [[TMP2]], i64 [[VEC_PHI2]]
+; CHECK-VF1IC4-NEXT:    [[TMP23]] = select i1 [[TMP19]], i64 [[TMP3]], i64 [[VEC_PHI3]]
+; CHECK-VF1IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF1IC4-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1IC4-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF1IC4:       middle.block:
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i64 [[TMP12]], i64 [[TMP13]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_CMP8:%.*]] = icmp sgt i64 [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_SELECT9:%.*]] = select i1 [[RDX_MINMAX_CMP8]], i64 [[RDX_MINMAX_SELECT]], i64 [[TMP14]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_CMP10:%.*]] = icmp sgt i64 [[RDX_MINMAX_SELECT9]], [[TMP15]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_SELECT11:%.*]] = select i1 [[RDX_MINMAX_CMP10]], i64 [[RDX_MINMAX_SELECT9]], i64 [[TMP15]]
+; CHECK-VF1IC4-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp sge i64 [[TMP12]], [[TMP13]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[TMP21]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP12:%.*]] = icmp sge i64 [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT13:%.*]] = select i1 [[RDX_SELECT_CMP12]], i64 [[RDX_SELECT]], i64 [[TMP22]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP14:%.*]] = icmp sge i64 [[RDX_MINMAX_SELECT9]], [[TMP15]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT15:%.*]] = select i1 [[RDX_SELECT_CMP14]], i64 [[RDX_SELECT13]], i64 [[TMP23]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP16:%.*]] = icmp ne i64 [[RDX_SELECT15]], -9223372036854775808
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT17:%.*]] = select i1 [[RDX_SELECT_CMP16]], i64 [[RDX_SELECT15]], i64 [[II:%.*]]
+; CHECK-VF1IC4-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1IC4:       scalar.ph:
+; CHECK-VF1IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[MM]], [[ENTRY]] ], [ [[RDX_MINMAX_SELECT11]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX18:%.*]] = phi i64 [ [[II]], [[ENTRY]] ], [ [[RDX_SELECT17]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1IC4:       for.body:
+; CHECK-VF1IC4-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[IDX_011:%.*]] = phi i64 [ [[BC_MERGE_RDX18]], [[SCALAR_PH]] ], [ [[SPEC_SELECT7:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[MAX_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP26:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1IC4-NEXT:    [[TMP25:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP26]] = tail call i64 @llvm.smax.i64(i64 [[MAX_09]], i64 [[TMP25]])
+; CHECK-VF1IC4-NEXT:    [[CMP1:%.*]] = icmp slt i64 [[MAX_09]], [[TMP25]]
+; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT7]] = select i1 [[CMP1]], i64 [[INDVARS_IV]], i64 [[IDX_011]]
+; CHECK-VF1IC4-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-VF1IC4:       exit:
+; CHECK-VF1IC4-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[TMP26]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT11]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT7_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT7]], [[FOR_BODY]] ], [ [[RDX_SELECT17]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    store i64 [[DOTLCSSA]], ptr [[RES_MAX:%.*]], align 4
+; CHECK-VF1IC4-NEXT:    ret i64 [[SPEC_SELECT7_LCSSA]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %idx.011 = phi i64 [ %ii, %entry ], [ %spec.select7, %for.body ]  ;;
+  %max.09 = phi i64 [ %mm, %entry ], [ %1, %for.body ]              ;;
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx
+  %1 = tail call i64 @llvm.smax.i64(i64 %max.09, i64 %0)
+  %cmp1 = icmp slt i64 %max.09, %0
+  %spec.select7 = select i1 %cmp1, i64 %indvars.iv, i64 %idx.011
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  store i64 %1, ptr %res_max
+  ret i64 %spec.select7
+}
+
+; Check if it is a MMI when smax is not used outside the loop.
+;
+; Currently at the end, it will check if smax has exitInstruction.
+; But in fact MMI should be possible to use the exitInstruction of
+; SelectICmp be the exitInstruction.
+;
+define i64 @smax_idx_max_no_exit_user(ptr nocapture readonly %a, i64 %mm, i64 %ii, i64 %n) {
+; CHECK-LABEL: @smax_idx_max_no_exit_user(
+; CHECK-NOT:   vector.body:
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.09 = phi i64 [ %mm, %entry ], [ %1, %for.body ]
+  %idx.011 = phi i64 [ %ii, %entry ], [ %spec.select7, %for.body ]
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx
+  %1 = tail call i64 @llvm.smax.i64(i64 %max.09, i64 %0)
+  %cmp1 = icmp slt i64 %max.09, %0
+  %spec.select7 = select i1 %cmp1, i64 %indvars.iv, i64 %idx.011
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  ; %1 has no external users
+  ret i64 %spec.select7
+}
+
+; Check smax implemented in terms of select(cmp()).
+;
+; Currently SelectICmp does not support icmp with multiple users.
+; It may be possible to reuse some of the methods in Combination pass to check
+; whether icmp can be copied.
+;
+define i64 @smax_idx_select_cmp(ptr nocapture readonly %a, i64 %mm, i64 %ii, ptr nocapture writeonly %res_max, i64 %n) {
+; CHECK-LABEL: @smax_idx_select_cmp(
+; CHECK-NOT:   vector.body:
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.09 = phi i64 [ %mm, %entry ], [ %spec.select, %for.body ]
+  %idx.011 = phi i64 [ %ii, %entry ], [ %spec.select7, %for.body ]
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx
+  %cmp1 = icmp slt i64 %max.09, %0                     ;;
+  %spec.select = select i1 %cmp1, i64 %0, i64 %max.09  ;;
+  %spec.select7 = select i1 %cmp1, i64 %indvars.iv, i64 %idx.011
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  store i64 %spec.select, ptr %res_max
+  ret i64 %spec.select7
+}
+
+;
+; Check sge case.
+;
+define i64 @smax_idx_inverted_pred(ptr nocapture readonly %a, i64 %mm, i64 %ii, ptr nocapture writeonly %res_max, i64 %n) {
+; CHECK-VF4IC1-LABEL: @smax_idx_inverted_pred(
+; CHECK-VF4IC1-NEXT:  entry:
+; CHECK-VF4IC1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
+; CHECK-VF4IC1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC1:       vector.ph:
+; CHECK-VF4IC1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-VF4IC1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4IC1-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[MM:%.*]], i64 0
+; CHECK-VF4IC1-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i64> [[MINMAX_IDENT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC1:       vector.body:
+; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
+; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4
+; CHECK-VF4IC1-NEXT:    [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI]], <4 x i64> [[WIDE_LOAD]])
+; CHECK-VF4IC1-NEXT:    [[TMP4:%.*]] = icmp sge <4 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI1]]
+; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4IC1-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-VF4IC1:       middle.block:
+; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]])
+; CHECK-VF4IC1-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP7]], i64 0
+; CHECK-VF4IC1-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC1-NEXT:    [[MASK_CMP:%.*]] = icmp eq <4 x i64> [[DOTSPLAT]], [[TMP3]]
+; CHECK-VF4IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4IC1-NEXT:    [[MASK_SELECT:%.*]] = select <4 x i1> [[MASK_CMP]], <4 x i64> [[TMP5]], <4 x i64> <i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807>
+; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[MASK_SELECT]])
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[II:%.*]]
+; CHECK-VF4IC1-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4IC1:       scalar.ph:
+; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[MM]], [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[II]], [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4IC1:       for.body:
+; CHECK-VF4IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[MAX_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[IDX_011:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[SPEC_SELECT7:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4IC1-NEXT:    [[TMP9:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4IC1-NEXT:    [[TMP10]] = tail call i64 @llvm.smax.i64(i64 [[MAX_09]], i64 [[TMP9]])
+; CHECK-VF4IC1-NEXT:    [[CMP1:%.*]] = icmp sge i64 [[TMP9]], [[MAX_09]]
+; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT7]] = select i1 [[CMP1]], i64 [[INDVARS_IV]], i64 [[IDX_011]]
+; CHECK-VF4IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-VF4IC1:       exit:
+; CHECK-VF4IC1-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[TMP10]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT7_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT7]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    store i64 [[DOTLCSSA]], ptr [[RES_MAX:%.*]], align 4
+; CHECK-VF4IC1-NEXT:    ret i64 [[SPEC_SELECT7_LCSSA]]
+;
+; CHECK-VF4IC4-LABEL: @smax_idx_inverted_pred(
+; CHECK-VF4IC4-NEXT:  entry:
+; CHECK-VF4IC4-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16
+; CHECK-VF4IC4-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC4:       vector.ph:
+; CHECK-VF4IC4-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
+; CHECK-VF4IC4-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4IC4-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[MM:%.*]], i64 0
+; CHECK-VF4IC4-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i64> [[MINMAX_IDENT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC4-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC4:       vector.body:
+; CHECK-VF4IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI7:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI8:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI9:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI10:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 8
+; CHECK-VF4IC4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 12
+; CHECK-VF4IC4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4IC4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-VF4IC4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-VF4IC4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP8]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD11:%.*]] = load <4 x i64>, ptr [[TMP9]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD12:%.*]] = load <4 x i64>, ptr [[TMP10]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD13:%.*]] = load <4 x i64>, ptr [[TMP11]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP12]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI]], <4 x i64> [[WIDE_LOAD]])
+; CHECK-VF4IC4-NEXT:    [[TMP13]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI4]], <4 x i64> [[WIDE_LOAD11]])
+; CHECK-VF4IC4-NEXT:    [[TMP14]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI5]], <4 x i64> [[WIDE_LOAD12]])
+; CHECK-VF4IC4-NEXT:    [[TMP15]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI6]], <4 x i64> [[WIDE_LOAD13]])
+; CHECK-VF4IC4-NEXT:    [[TMP16:%.*]] = icmp sge <4 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-VF4IC4-NEXT:    [[TMP17:%.*]] = icmp sge <4 x i64> [[WIDE_LOAD11]], [[VEC_PHI4]]
+; CHECK-VF4IC4-NEXT:    [[TMP18:%.*]] = icmp sge <4 x i64> [[WIDE_LOAD12]], [[VEC_PHI5]]
+; CHECK-VF4IC4-NEXT:    [[TMP19:%.*]] = icmp sge <4 x i64> [[WIDE_LOAD13]], [[VEC_PHI6]]
+; CHECK-VF4IC4-NEXT:    [[TMP20]] = select <4 x i1> [[TMP16]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI7]]
+; CHECK-VF4IC4-NEXT:    [[TMP21]] = select <4 x i1> [[TMP17]], <4 x i64> [[STEP_ADD]], <4 x i64> [[VEC_PHI8]]
+; CHECK-VF4IC4-NEXT:    [[TMP22]] = select <4 x i1> [[TMP18]], <4 x i64> [[STEP_ADD1]], <4 x i64> [[VEC_PHI9]]
+; CHECK-VF4IC4-NEXT:    [[TMP23]] = select <4 x i1> [[TMP19]], <4 x i64> [[STEP_ADD2]], <4 x i64> [[VEC_PHI10]]
+; CHECK-VF4IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-VF4IC4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4IC4-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-VF4IC4:       middle.block:
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i64> [[TMP12]], [[TMP13]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i64> [[TMP12]], <4 x i64> [[TMP13]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_CMP14:%.*]] = icmp sgt <4 x i64> [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_SELECT15:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP14]], <4 x i64> [[RDX_MINMAX_SELECT]], <4 x i64> [[TMP14]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_CMP16:%.*]] = icmp sgt <4 x i64> [[RDX_MINMAX_SELECT15]], [[TMP15]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_SELECT17:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP16]], <4 x i64> [[RDX_MINMAX_SELECT15]], <4 x i64> [[TMP15]]
+; CHECK-VF4IC4-NEXT:    [[TMP25:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX_SELECT17]])
+; CHECK-VF4IC4-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP25]], i64 0
+; CHECK-VF4IC4-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC4-NEXT:    [[MASK_CMP:%.*]] = icmp eq <4 x i64> [[DOTSPLAT]], [[RDX_MINMAX_SELECT17]]
+; CHECK-VF4IC4-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp sge <4 x i64> [[TMP12]], [[TMP13]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i64> [[TMP20]], <4 x i64> [[TMP21]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP18:%.*]] = icmp sge <4 x i64> [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT19:%.*]] = select <4 x i1> [[RDX_SELECT_CMP18]], <4 x i64> [[RDX_SELECT]], <4 x i64> [[TMP22]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP20:%.*]] = icmp sge <4 x i64> [[RDX_MINMAX_SELECT15]], [[TMP15]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT21:%.*]] = select <4 x i1> [[RDX_SELECT_CMP20]], <4 x i64> [[RDX_SELECT19]], <4 x i64> [[TMP23]]
+; CHECK-VF4IC4-NEXT:    [[MASK_SELECT:%.*]] = select <4 x i1> [[MASK_CMP]], <4 x i64> [[RDX_SELECT21]], <4 x i64> <i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807, i64 9223372036854775807>
+; CHECK-VF4IC4-NEXT:    [[TMP26:%.*]] = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> [[MASK_SELECT]])
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP22:%.*]] = icmp ne i64 [[TMP26]], -9223372036854775808
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT23:%.*]] = select i1 [[RDX_SELECT_CMP22]], i64 [[TMP26]], i64 [[II:%.*]]
+; CHECK-VF4IC4-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4IC4:       scalar.ph:
+; CHECK-VF4IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[MM]], [[ENTRY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX24:%.*]] = phi i64 [ [[II]], [[ENTRY]] ], [ [[RDX_SELECT23]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4IC4:       for.body:
+; CHECK-VF4IC4-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[MAX_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[IDX_011:%.*]] = phi i64 [ [[BC_MERGE_RDX24]], [[SCALAR_PH]] ], [ [[SPEC_SELECT7:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4IC4-NEXT:    [[TMP27:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP28]] = tail call i64 @llvm.smax.i64(i64 [[MAX_09]], i64 [[TMP27]])
+; CHECK-VF4IC4-NEXT:    [[CMP1:%.*]] = icmp sge i64 [[TMP27]], [[MAX_09]]
+; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT7]] = select i1 [[CMP1]], i64 [[INDVARS_IV]], i64 [[IDX_011]]
+; CHECK-VF4IC4-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-VF4IC4:       exit:
+; CHECK-VF4IC4-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[TMP28]], [[FOR_BODY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT7_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT7]], [[FOR_BODY]] ], [ [[RDX_SELECT23]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    store i64 [[DOTLCSSA]], ptr [[RES_MAX:%.*]], align 4
+; CHECK-VF4IC4-NEXT:    ret i64 [[SPEC_SELECT7_LCSSA]]
+;
+; CHECK-VF1IC4-LABEL: @smax_idx_inverted_pred(
+; CHECK-VF1IC4-NEXT:  entry:
+; CHECK-VF1IC4-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
+; CHECK-VF1IC4-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1IC4:       vector.ph:
+; CHECK-VF1IC4-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-VF1IC4-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF1IC4-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1IC4:       vector.body:
+; CHECK-VF1IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ [[MM:%.*]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI1:%.*]] = phi i64 [ [[MM]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI2:%.*]] = phi i64 [ [[MM]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI3:%.*]] = phi i64 [ [[MM]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI4:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI5:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI6:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI7:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1IC4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-VF1IC4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-VF1IC4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF1IC4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF1IC4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-VF1IC4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-VF1IC4-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP12]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI]], i64 [[TMP8]])
+; CHECK-VF1IC4-NEXT:    [[TMP13]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI1]], i64 [[TMP9]])
+; CHECK-VF1IC4-NEXT:    [[TMP14]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI2]], i64 [[TMP10]])
+; CHECK-VF1IC4-NEXT:    [[TMP15]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI3]], i64 [[TMP11]])
+; CHECK-VF1IC4-NEXT:    [[TMP16:%.*]] = icmp sge i64 [[TMP8]], [[VEC_PHI]]
+; CHECK-VF1IC4-NEXT:    [[TMP17:%.*]] = icmp sge i64 [[TMP9]], [[VEC_PHI1]]
+; CHECK-VF1IC4-NEXT:    [[TMP18:%.*]] = icmp sge i64 [[TMP10]], [[VEC_PHI2]]
+; CHECK-VF1IC4-NEXT:    [[TMP19:%.*]] = icmp sge i64 [[TMP11]], [[VEC_PHI3]]
+; CHECK-VF1IC4-NEXT:    [[TMP20]] = select i1 [[TMP16]], i64 [[TMP0]], i64 [[VEC_PHI4]]
+; CHECK-VF1IC4-NEXT:    [[TMP21]] = select i1 [[TMP17]], i64 [[TMP1]], i64 [[VEC_PHI5]]
+; CHECK-VF1IC4-NEXT:    [[TMP22]] = select i1 [[TMP18]], i64 [[TMP2]], i64 [[VEC_PHI6]]
+; CHECK-VF1IC4-NEXT:    [[TMP23]] = select i1 [[TMP19]], i64 [[TMP3]], i64 [[VEC_PHI7]]
+; CHECK-VF1IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF1IC4-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1IC4-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-VF1IC4:       middle.block:
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i64 [[TMP12]], i64 [[TMP13]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_CMP8:%.*]] = icmp sgt i64 [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_SELECT9:%.*]] = select i1 [[RDX_MINMAX_CMP8]], i64 [[RDX_MINMAX_SELECT]], i64 [[TMP14]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_CMP10:%.*]] = icmp sgt i64 [[RDX_MINMAX_SELECT9]], [[TMP15]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_SELECT11:%.*]] = select i1 [[RDX_MINMAX_CMP10]], i64 [[RDX_MINMAX_SELECT9]], i64 [[TMP15]]
+; CHECK-VF1IC4-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp sge i64 [[TMP12]], [[TMP13]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP20]], i64 [[TMP21]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP12:%.*]] = icmp sge i64 [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT13:%.*]] = select i1 [[RDX_SELECT_CMP12]], i64 [[RDX_SELECT]], i64 [[TMP22]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP14:%.*]] = icmp sge i64 [[RDX_MINMAX_SELECT9]], [[TMP15]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT15:%.*]] = select i1 [[RDX_SELECT_CMP14]], i64 [[RDX_SELECT13]], i64 [[TMP23]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP16:%.*]] = icmp ne i64 [[RDX_SELECT15]], -9223372036854775808
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT17:%.*]] = select i1 [[RDX_SELECT_CMP16]], i64 [[RDX_SELECT15]], i64 [[II:%.*]]
+; CHECK-VF1IC4-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1IC4:       scalar.ph:
+; CHECK-VF1IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[MM]], [[ENTRY]] ], [ [[RDX_MINMAX_SELECT11]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX18:%.*]] = phi i64 [ [[II]], [[ENTRY]] ], [ [[RDX_SELECT17]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1IC4:       for.body:
+; CHECK-VF1IC4-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[MAX_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP26:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[IDX_011:%.*]] = phi i64 [ [[BC_MERGE_RDX18]], [[SCALAR_PH]] ], [ [[SPEC_SELECT7:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1IC4-NEXT:    [[TMP25:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP26]] = tail call i64 @llvm.smax.i64(i64 [[MAX_09]], i64 [[TMP25]])
+; CHECK-VF1IC4-NEXT:    [[CMP1:%.*]] = icmp sge i64 [[TMP25]], [[MAX_09]]
+; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT7]] = select i1 [[CMP1]], i64 [[INDVARS_IV]], i64 [[IDX_011]]
+; CHECK-VF1IC4-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-VF1IC4:       exit:
+; CHECK-VF1IC4-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[TMP26]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT11]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT7_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT7]], [[FOR_BODY]] ], [ [[RDX_SELECT17]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    store i64 [[DOTLCSSA]], ptr [[RES_MAX:%.*]], align 4
+; CHECK-VF1IC4-NEXT:    ret i64 [[SPEC_SELECT7_LCSSA]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.09 = phi i64 [ %mm, %entry ], [ %1, %for.body ]
+  %idx.011 = phi i64 [ %ii, %entry ], [ %spec.select7, %for.body ]
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx
+  %1 = tail call i64 @llvm.smax.i64(i64 %max.09, i64 %0)
+  %cmp1 = icmp sge i64 %0, %max.09  ;;
+  %spec.select7 = select i1 %cmp1, i64 %indvars.iv, i64 %idx.011
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  store i64 %1, ptr %res_max
+  ret i64 %spec.select7
+}
+
+;
+; In such cases, the last index should be extracted.
+;
+define i64 @smax_idx_extract_last(ptr nocapture readonly %a, i64 %mm, i64 %ii, ptr nocapture writeonly %res_max, i64 %n) {
+; CHECK-VF4IC1-LABEL: @smax_idx_extract_last(
+; CHECK-VF4IC1-NEXT:  entry:
+; CHECK-VF4IC1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
+; CHECK-VF4IC1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC1:       vector.ph:
+; CHECK-VF4IC1-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-VF4IC1-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4IC1-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[MM:%.*]], i64 0
+; CHECK-VF4IC1-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i64> [[MINMAX_IDENT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC1:       vector.body:
+; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0
+; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4
+; CHECK-VF4IC1-NEXT:    [[TMP3]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI]], <4 x i64> [[WIDE_LOAD]])
+; CHECK-VF4IC1-NEXT:    [[TMP4:%.*]] = icmp sgt <4 x i64> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-VF4IC1-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x i64> [[VEC_PHI1]], <4 x i64> [[VEC_IND]]
+; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4IC1-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-VF4IC1:       middle.block:
+; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[TMP3]])
+; CHECK-VF4IC1-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP7]], i64 0
+; CHECK-VF4IC1-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC1-NEXT:    [[MASK_CMP:%.*]] = icmp eq <4 x i64> [[DOTSPLAT]], [[TMP3]]
+; CHECK-VF4IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4IC1-NEXT:    [[MASK_SELECT:%.*]] = select <4 x i1> [[MASK_CMP]], <4 x i64> [[TMP5]], <4 x i64> <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>
+; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[MASK_SELECT]])
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP8]], -9223372036854775808
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP8]], i64 [[II:%.*]]
+; CHECK-VF4IC1-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4IC1:       scalar.ph:
+; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[MM]], [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX2:%.*]] = phi i64 [ [[II]], [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4IC1:       for.body:
+; CHECK-VF4IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[MAX_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[IDX_011:%.*]] = phi i64 [ [[BC_MERGE_RDX2]], [[SCALAR_PH]] ], [ [[SPEC_SELECT7:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4IC1-NEXT:    [[TMP9:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4IC1-NEXT:    [[TMP10]] = tail call i64 @llvm.smax.i64(i64 [[MAX_09]], i64 [[TMP9]])
+; CHECK-VF4IC1-NEXT:    [[CMP1_NOT:%.*]] = icmp sgt i64 [[MAX_09]], [[TMP9]]
+; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT7]] = select i1 [[CMP1_NOT]], i64 [[IDX_011]], i64 [[INDVARS_IV]]
+; CHECK-VF4IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-VF4IC1:       exit:
+; CHECK-VF4IC1-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[TMP10]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[SPEC_SELECT7_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT7]], [[FOR_BODY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    store i64 [[DOTLCSSA]], ptr [[RES_MAX:%.*]], align 4
+; CHECK-VF4IC1-NEXT:    ret i64 [[SPEC_SELECT7_LCSSA]]
+;
+; CHECK-VF4IC4-LABEL: @smax_idx_extract_last(
+; CHECK-VF4IC4-NEXT:  entry:
+; CHECK-VF4IC4-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16
+; CHECK-VF4IC4-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC4:       vector.ph:
+; CHECK-VF4IC4-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 16
+; CHECK-VF4IC4-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF4IC4-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[MM:%.*]], i64 0
+; CHECK-VF4IC4-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i64> [[MINMAX_IDENT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC4-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC4:       vector.body:
+; CHECK-VF4IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI5:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI6:%.*]] = phi <4 x i64> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI7:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI8:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI9:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[VEC_PHI10:%.*]] = phi <4 x i64> [ <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD1:%.*]] = add <4 x i64> [[STEP_ADD]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[STEP_ADD2:%.*]] = add <4 x i64> [[STEP_ADD1]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-VF4IC4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 8
+; CHECK-VF4IC4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 12
+; CHECK-VF4IC4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF4IC4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-VF4IC4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-VF4IC4-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP8]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD11:%.*]] = load <4 x i64>, ptr [[TMP9]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 8
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD12:%.*]] = load <4 x i64>, ptr [[TMP10]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 12
+; CHECK-VF4IC4-NEXT:    [[WIDE_LOAD13:%.*]] = load <4 x i64>, ptr [[TMP11]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP12]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI]], <4 x i64> [[WIDE_LOAD]])
+; CHECK-VF4IC4-NEXT:    [[TMP13]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI4]], <4 x i64> [[WIDE_LOAD11]])
+; CHECK-VF4IC4-NEXT:    [[TMP14]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI5]], <4 x i64> [[WIDE_LOAD12]])
+; CHECK-VF4IC4-NEXT:    [[TMP15]] = call <4 x i64> @llvm.smax.v4i64(<4 x i64> [[VEC_PHI6]], <4 x i64> [[WIDE_LOAD13]])
+; CHECK-VF4IC4-NEXT:    [[TMP16:%.*]] = icmp sgt <4 x i64> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-VF4IC4-NEXT:    [[TMP17:%.*]] = icmp sgt <4 x i64> [[VEC_PHI4]], [[WIDE_LOAD11]]
+; CHECK-VF4IC4-NEXT:    [[TMP18:%.*]] = icmp sgt <4 x i64> [[VEC_PHI5]], [[WIDE_LOAD12]]
+; CHECK-VF4IC4-NEXT:    [[TMP19:%.*]] = icmp sgt <4 x i64> [[VEC_PHI6]], [[WIDE_LOAD13]]
+; CHECK-VF4IC4-NEXT:    [[TMP20]] = select <4 x i1> [[TMP16]], <4 x i64> [[VEC_PHI7]], <4 x i64> [[VEC_IND]]
+; CHECK-VF4IC4-NEXT:    [[TMP21]] = select <4 x i1> [[TMP17]], <4 x i64> [[VEC_PHI8]], <4 x i64> [[STEP_ADD]]
+; CHECK-VF4IC4-NEXT:    [[TMP22]] = select <4 x i1> [[TMP18]], <4 x i64> [[VEC_PHI9]], <4 x i64> [[STEP_ADD1]]
+; CHECK-VF4IC4-NEXT:    [[TMP23]] = select <4 x i1> [[TMP19]], <4 x i64> [[VEC_PHI10]], <4 x i64> [[STEP_ADD2]]
+; CHECK-VF4IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-VF4IC4-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD2]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-VF4IC4-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF4IC4-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-VF4IC4:       middle.block:
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt <4 x i64> [[TMP12]], [[TMP13]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i64> [[TMP12]], <4 x i64> [[TMP13]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_CMP14:%.*]] = icmp sgt <4 x i64> [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_SELECT15:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP14]], <4 x i64> [[RDX_MINMAX_SELECT]], <4 x i64> [[TMP14]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_CMP16:%.*]] = icmp sgt <4 x i64> [[RDX_MINMAX_SELECT15]], [[TMP15]]
+; CHECK-VF4IC4-NEXT:    [[RDX_MINMAX_SELECT17:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP16]], <4 x i64> [[RDX_MINMAX_SELECT15]], <4 x i64> [[TMP15]]
+; CHECK-VF4IC4-NEXT:    [[TMP25:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[RDX_MINMAX_SELECT17]])
+; CHECK-VF4IC4-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP25]], i64 0
+; CHECK-VF4IC4-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i64> [[DOTSPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-VF4IC4-NEXT:    [[MASK_CMP:%.*]] = icmp eq <4 x i64> [[DOTSPLAT]], [[RDX_MINMAX_SELECT17]]
+; CHECK-VF4IC4-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i64> [[TMP20]], <4 x i64> [[TMP21]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT18:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP14]], <4 x i64> [[RDX_SELECT]], <4 x i64> [[TMP22]]
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT19:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP16]], <4 x i64> [[RDX_SELECT18]], <4 x i64> [[TMP23]]
+; CHECK-VF4IC4-NEXT:    [[MASK_SELECT:%.*]] = select <4 x i1> [[MASK_CMP]], <4 x i64> [[RDX_SELECT19]], <4 x i64> <i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808, i64 -9223372036854775808>
+; CHECK-VF4IC4-NEXT:    [[TMP26:%.*]] = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> [[MASK_SELECT]])
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[TMP26]], -9223372036854775808
+; CHECK-VF4IC4-NEXT:    [[RDX_SELECT20:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[TMP26]], i64 [[II:%.*]]
+; CHECK-VF4IC4-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF4IC4:       scalar.ph:
+; CHECK-VF4IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[MM]], [[ENTRY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    [[BC_MERGE_RDX21:%.*]] = phi i64 [ [[II]], [[ENTRY]] ], [ [[RDX_SELECT20]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF4IC4:       for.body:
+; CHECK-VF4IC4-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[MAX_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[IDX_011:%.*]] = phi i64 [ [[BC_MERGE_RDX21]], [[SCALAR_PH]] ], [ [[SPEC_SELECT7:%.*]], [[FOR_BODY]] ]
+; CHECK-VF4IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4IC4-NEXT:    [[TMP27:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
+; CHECK-VF4IC4-NEXT:    [[TMP28]] = tail call i64 @llvm.smax.i64(i64 [[MAX_09]], i64 [[TMP27]])
+; CHECK-VF4IC4-NEXT:    [[CMP1_NOT:%.*]] = icmp sgt i64 [[MAX_09]], [[TMP27]]
+; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT7]] = select i1 [[CMP1_NOT]], i64 [[IDX_011]], i64 [[INDVARS_IV]]
+; CHECK-VF4IC4-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF4IC4-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-VF4IC4:       exit:
+; CHECK-VF4IC4-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[TMP28]], [[FOR_BODY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    [[SPEC_SELECT7_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT7]], [[FOR_BODY]] ], [ [[RDX_SELECT20]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC4-NEXT:    store i64 [[DOTLCSSA]], ptr [[RES_MAX:%.*]], align 4
+; CHECK-VF4IC4-NEXT:    ret i64 [[SPEC_SELECT7_LCSSA]]
+;
+; CHECK-VF1IC4-LABEL: @smax_idx_extract_last(
+; CHECK-VF1IC4-NEXT:  entry:
+; CHECK-VF1IC4-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
+; CHECK-VF1IC4-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF1IC4:       vector.ph:
+; CHECK-VF1IC4-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-VF1IC4-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-VF1IC4-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF1IC4:       vector.body:
+; CHECK-VF1IC4-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ [[MM:%.*]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI1:%.*]] = phi i64 [ [[MM]], [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI2:%.*]] = phi i64 [ [[MM]], [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI3:%.*]] = phi i64 [ [[MM]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI4:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI5:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI6:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[VEC_PHI7:%.*]] = phi i64 [ -9223372036854775808, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF1IC4-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-VF1IC4-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
+; CHECK-VF1IC4-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-VF1IC4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF1IC4-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]]
+; CHECK-VF1IC4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
+; CHECK-VF1IC4-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
+; CHECK-VF1IC4-NEXT:    [[TMP8:%.*]] = load i64, ptr [[TMP4]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP9:%.*]] = load i64, ptr [[TMP5]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP10:%.*]] = load i64, ptr [[TMP6]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP11:%.*]] = load i64, ptr [[TMP7]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP12]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI]], i64 [[TMP8]])
+; CHECK-VF1IC4-NEXT:    [[TMP13]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI1]], i64 [[TMP9]])
+; CHECK-VF1IC4-NEXT:    [[TMP14]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI2]], i64 [[TMP10]])
+; CHECK-VF1IC4-NEXT:    [[TMP15]] = tail call i64 @llvm.smax.i64(i64 [[VEC_PHI3]], i64 [[TMP11]])
+; CHECK-VF1IC4-NEXT:    [[TMP16:%.*]] = icmp sgt i64 [[VEC_PHI]], [[TMP8]]
+; CHECK-VF1IC4-NEXT:    [[TMP17:%.*]] = icmp sgt i64 [[VEC_PHI1]], [[TMP9]]
+; CHECK-VF1IC4-NEXT:    [[TMP18:%.*]] = icmp sgt i64 [[VEC_PHI2]], [[TMP10]]
+; CHECK-VF1IC4-NEXT:    [[TMP19:%.*]] = icmp sgt i64 [[VEC_PHI3]], [[TMP11]]
+; CHECK-VF1IC4-NEXT:    [[TMP20]] = select i1 [[TMP16]], i64 [[VEC_PHI4]], i64 [[TMP0]]
+; CHECK-VF1IC4-NEXT:    [[TMP21]] = select i1 [[TMP17]], i64 [[VEC_PHI5]], i64 [[TMP1]]
+; CHECK-VF1IC4-NEXT:    [[TMP22]] = select i1 [[TMP18]], i64 [[VEC_PHI6]], i64 [[TMP2]]
+; CHECK-VF1IC4-NEXT:    [[TMP23]] = select i1 [[TMP19]], i64 [[VEC_PHI7]], i64 [[TMP3]]
+; CHECK-VF1IC4-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF1IC4-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-VF1IC4-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-VF1IC4:       middle.block:
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp sgt i64 [[TMP12]], [[TMP13]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i64 [[TMP12]], i64 [[TMP13]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_CMP8:%.*]] = icmp sgt i64 [[RDX_MINMAX_SELECT]], [[TMP14]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_SELECT9:%.*]] = select i1 [[RDX_MINMAX_CMP8]], i64 [[RDX_MINMAX_SELECT]], i64 [[TMP14]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_CMP10:%.*]] = icmp sgt i64 [[RDX_MINMAX_SELECT9]], [[TMP15]]
+; CHECK-VF1IC4-NEXT:    [[RDX_MINMAX_SELECT11:%.*]] = select i1 [[RDX_MINMAX_CMP10]], i64 [[RDX_MINMAX_SELECT9]], i64 [[TMP15]]
+; CHECK-VF1IC4-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i64 [[TMP20]], i64 [[TMP21]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT12:%.*]] = select i1 [[RDX_MINMAX_CMP8]], i64 [[RDX_SELECT]], i64 [[TMP22]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT13:%.*]] = select i1 [[RDX_MINMAX_CMP10]], i64 [[RDX_SELECT12]], i64 [[TMP23]]
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne i64 [[RDX_SELECT13]], -9223372036854775808
+; CHECK-VF1IC4-NEXT:    [[RDX_SELECT14:%.*]] = select i1 [[RDX_SELECT_CMP]], i64 [[RDX_SELECT13]], i64 [[II:%.*]]
+; CHECK-VF1IC4-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-VF1IC4:       scalar.ph:
+; CHECK-VF1IC4-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[MM]], [[ENTRY]] ], [ [[RDX_MINMAX_SELECT11]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    [[BC_MERGE_RDX15:%.*]] = phi i64 [ [[II]], [[ENTRY]] ], [ [[RDX_SELECT14]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK-VF1IC4:       for.body:
+; CHECK-VF1IC4-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[MAX_09:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP26:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[IDX_011:%.*]] = phi i64 [ [[BC_MERGE_RDX15]], [[SCALAR_PH]] ], [ [[SPEC_SELECT7:%.*]], [[FOR_BODY]] ]
+; CHECK-VF1IC4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF1IC4-NEXT:    [[TMP25:%.*]] = load i64, ptr [[ARRAYIDX]], align 4
+; CHECK-VF1IC4-NEXT:    [[TMP26]] = tail call i64 @llvm.smax.i64(i64 [[MAX_09]], i64 [[TMP25]])
+; CHECK-VF1IC4-NEXT:    [[CMP1_NOT:%.*]] = icmp sgt i64 [[MAX_09]], [[TMP25]]
+; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT7]] = select i1 [[CMP1_NOT]], i64 [[IDX_011]], i64 [[INDVARS_IV]]
+; CHECK-VF1IC4-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF1IC4-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
+; CHECK-VF1IC4-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK-VF1IC4:       exit:
+; CHECK-VF1IC4-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[TMP26]], [[FOR_BODY]] ], [ [[RDX_MINMAX_SELECT11]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    [[SPEC_SELECT7_LCSSA:%.*]] = phi i64 [ [[SPEC_SELECT7]], [[FOR_BODY]] ], [ [[RDX_SELECT14]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF1IC4-NEXT:    store i64 [[DOTLCSSA]], ptr [[RES_MAX:%.*]], align 4
+; CHECK-VF1IC4-NEXT:    ret i64 [[SPEC_SELECT7_LCSSA]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.09 = phi i64 [ %mm, %entry ], [ %1, %for.body ]
+  %idx.011 = phi i64 [ %ii, %entry ], [ %spec.select7, %for.body ]
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx
+  %1 = tail call i64 @llvm.smax.i64(i64 %max.09, i64 %0)
+  %cmp1.not = icmp sgt i64 %max.09, %0
+  %spec.select7 = select i1 %cmp1.not, i64 %idx.011, i64 %indvars.iv
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  store i64 %1, ptr %res_max
+  ret i64 %spec.select7
+}
+
+;
+; The operands of smax intrinsic and icmp are not the same to be recognized as MMI.
+;
+; FIXME: this case should not be vectorized. We have to check the operands of intrinsic and icmp.
+define i64 @smax_idx_not_vec_1(ptr nocapture readonly %a, ptr nocapture readonly %b, i64 %mm, i64 %ii, ptr nocapture writeonly %res_max, i64 %n) {
+; CHECK-LABEL: @smax_idx_not_vec_1(
+; CHECK-NOT:   vector.body:
+;
+  entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.09 = phi i64 [ %mm, %entry ], [ %2, %for.body ]
+  %idx.011 = phi i64 [ %ii, %entry ], [ %spec.select7, %for.body ]
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx
+  %arrayidx.01 = getelementptr inbounds i64, ptr %b, i64 %indvars.iv
+  %1 = load i64, ptr %arrayidx
+  %2 = tail call i64 @llvm.smax.i64(i64 %max.09, i64 %0)
+  %cmp1 = icmp slt i64 %max.09, %1  ;;
+  %spec.select7 = select i1 %cmp1, i64 %indvars.iv, i64 %idx.011
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  store i64 %2, ptr %res_max
+  ret i64 %spec.select7
+}
+
+;
+; It cannot be recognized as MMI when the operand of index select is not an induction variable.
+;
+define i64 @smax_idx_not_vec_2(ptr nocapture readonly %a, i64 %mm, i64 %ii, ptr nocapture writeonly %res_max, i64 %n) {
+; CHECK-LABEL: @smax_idx_not_vec_2(
+; CHECK-NOT:   vector.body:
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %max.09 = phi i64 [ %mm, %entry ], [ %1, %for.body ]
+  %idx.011 = phi i64 [ %ii, %entry ], [ %spec.select7, %for.body ]
+  %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+  %0 = load i64, ptr %arrayidx
+  %1 = tail call i64 @llvm.smax.i64(i64 %max.09, i64 %0)
+  %cmp1 = icmp slt i64 %max.09, %0
+  %spec.select7 = select i1 %cmp1, i64 123, i64 %idx.011  ;;
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond.not, label %exit, label %for.body
+
+exit:
+  store i64 %1, ptr %res_max
+  ret i64 %spec.select7
+}
+
+declare i64 @llvm.smax.i64(i64, i64)