diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -123,7 +123,7 @@
   /// the returned struct.
   static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I,
                                     RecurKind Kind, InstDesc &Prev,
-                                    FastMathFlags FuncFMF);
+                                    FastMathFlags FuncFMF, ScalarEvolution *SE);
 
   /// Returns true if instruction I has multiple uses in Insts
   static bool hasMultipleUsesOf(Instruction *I,
@@ -148,7 +148,8 @@
   /// value. \p Prev specifies the description of an already processed select
   /// instruction, so its corresponding cmp can be matched to it.
   static InstDesc isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
-                                     Instruction *I, InstDesc &Prev);
+                                     Instruction *I, InstDesc &Prev,
+                                     ScalarEvolution *SE);
 
   /// Returns a struct describing if the instruction is a
   /// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -15,6 +15,7 @@
 
 #include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 
 namespace llvm {
@@ -398,7 +399,8 @@
                                       const TargetTransformInfo *TTI,
                                       Value *Src,
                                       const RecurrenceDescriptor &Desc,
-                                      PHINode *OrigPhi);
+                                      PHINode *OrigPhi, Loop *OrigLoop,
+                                      Loop::LoopBounds::Direction Direction);
 
 /// Create a generic target reduction using a recurrence descriptor \p Desc
 /// The target is queried to determine if intrinsics or shuffle sequences are
@@ -406,7 +408,10 @@
 /// Fast-math-flags are propagated using the RecurrenceDescriptor.
 Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
                              const RecurrenceDescriptor &Desc, Value *Src,
-                             PHINode *OrigPhi = nullptr);
+                             PHINode *OrigPhi = nullptr,
+                             Loop *OrigLoop = nullptr,
+                             Loop::LoopBounds::Direction Direction =
+                                 Loop::LoopBounds::Direction::Unknown);
 
 /// Create an ordered reduction intrinsic using the given recurrence
 /// descriptor \p Desc.
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -375,7 +375,7 @@
     // type-promoted).
     if (Cur != Start) {
       ReduxDesc =
-          isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);
+          isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE);
       ExactFPMathInst = ExactFPMathInst == nullptr
                             ? ReduxDesc.getExactFPMathInst()
                             : ExactFPMathInst;
@@ -488,7 +488,7 @@
                  ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
                    !isa<SelectInst>(UI)) ||
                   (!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
-                   !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal)
+                   !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal, SE)
                         .isRecurrence() &&
                    !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))
         return false;
@@ -606,7 +606,9 @@
   return true;
 }
 
-// We are looking for loops that do something like this:
+// We are looking for two patterns.
+//
+// The first is loops that do something like this:
 //   int r = 0;
 //   for (int i = 0; i < n; i++) {
 //     if (src[i] > 3)
@@ -627,9 +629,25 @@
 // across-vector reduction after the loop simply involves choosing the start
 // value if nothing changed (0 in the example above) or the other selected
 // value (3 in the example above).
+//
+// The second is loops that do something like this:
+//   int r = 0;
+//   for (int i = 0; i < n; i++) {
+//     if (src[i] > 3)
+//       r = i;
+//   }
+// where the reduction value (r) has many states, in this example 0 or the
+// maximal value of the loop induction variable, with the mask src[i] > 3.
+//
+// In general we can support vectorization of loops where 'r' is a variable. The
+// only thing we actually care about at the end of the loop is whether or not
+// any lane in the selected vector is different from the start value. The final
+// across-vector min/max reduction after the loop involves setting the value
+// from a mask, depening on whether i is increasing or decreasing in the loop.
 RecurrenceDescriptor::InstDesc
 RecurrenceDescriptor::isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
-                                         Instruction *I, InstDesc &Prev) {
+                                         Instruction *I, InstDesc &Prev,
+                                         ScalarEvolution *SE) {
   // We must handle the select(cmp(),x,y) as a single instruction. Advance to
   // the select.
   CmpInst::Predicate Pred;
@@ -646,6 +664,9 @@
   SelectInst *SI = cast<SelectInst>(I);
   Value *NonPhi = nullptr;
 
+  // In the loop-invariant case, we are looking for selects of the form:
+  //   select(cmp(), phi, loop_invariant) or
+  //   select(cmp(), loop_invariant, phi)
   if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
     NonPhi = SI->getFalseValue();
   else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
@@ -653,11 +674,24 @@
   else
     return InstDesc(false, I);
 
-  // We are looking for selects of the form:
-  //   select(cmp(), phi, loop_invariant) or
-  //   select(cmp(), loop_invariant, phi)
-  if (!Loop->isLoopInvariant(NonPhi))
-    return InstDesc(false, I);
+  if (!Loop->isLoopInvariant(NonPhi)) {
+    // If the loop is not invariant, we need to know the direction of the loop.
+    auto Bounds = SE ? Loop->getBounds(*SE) : std::nullopt;
+    if (!Bounds ||
+        Bounds->getDirection() == Loop::LoopBounds::Direction::Unknown)
+      return InstDesc(false, I);
+
+    // If the loop is non-invariant, we need a constant Step AddRec expression.
+    if (!SE->isSCEVable(NonPhi->getType()))
+      return InstDesc(false, I);
+    else if (auto *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(NonPhi))) {
+      const SCEV *Step = AR->getStepRecurrence(*SE);
+      const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
+      if (!ConstStep && !SE->isLoopInvariant(Step, Loop))
+        return InstDesc(false, I);
+    } else
+      return InstDesc(false, I);
+  }
 
   return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::SelectICmp
                                                      : RecurKind::SelectFCmp);
@@ -762,10 +796,9 @@
   return InstDesc(true, SI);
 }
 
-RecurrenceDescriptor::InstDesc
-RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
-                                        Instruction *I, RecurKind Kind,
-                                        InstDesc &Prev, FastMathFlags FuncFMF) {
+RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
+    Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev,
+    FastMathFlags FuncFMF, ScalarEvolution *SE) {
   assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
   switch (I->getOpcode()) {
   default:
@@ -800,7 +833,7 @@
   case Instruction::ICmp:
   case Instruction::Call:
     if (isSelectCmpRecurrenceKind(Kind))
-      return isSelectCmpPattern(L, OrigPhi, I, Prev);
+      return isSelectCmpPattern(L, OrigPhi, I, Prev, SE);
     if (isIntMinMaxRecurrenceKind(Kind) ||
         (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
           (isa<FPMathOperator>(I) && I->hasNoNaNs() &&
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1021,11 +1021,10 @@
   return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
 }
 
-Value *llvm::createSelectCmpTargetReduction(IRBuilderBase &Builder,
-                                            const TargetTransformInfo *TTI,
-                                            Value *Src,
-                                            const RecurrenceDescriptor &Desc,
-                                            PHINode *OrigPhi) {
+Value *llvm::createSelectCmpTargetReduction(
+    IRBuilderBase &Builder, const TargetTransformInfo *TTI, Value *Src,
+    const RecurrenceDescriptor &Desc, PHINode *OrigPhi, Loop *OrigLoop,
+    Loop::LoopBounds::Direction Direction) {
   assert(RecurrenceDescriptor::isSelectCmpRecurrenceKind(
              Desc.getRecurrenceKind()) &&
          "Unexpected reduction kind");
@@ -1053,11 +1052,55 @@
   // we want to reduce.
   ElementCount EC = cast<VectorType>(Src->getType())->getElementCount();
   Value *Right = Builder.CreateVectorSplat(EC, InitVal);
-  Value *Cmp =
+  Value *CmpVector =
       Builder.CreateCmp(CmpInst::ICMP_NE, Src, Right, "rdx.select.cmp");
+  Value *Cmp = Builder.CreateOrReduce(CmpVector);
+
+  if (!OrigLoop->isLoopInvariant(NewVal)) {
+    // If NewVal isn't loop invariant, consider the following example:
+    //   int src[n] = {4, 5, 2};
+    //   int r = 331;
+    //   for (int i = 0; i < n; i++) {
+    //     if (src[i] > 3)
+    //       r = i;
+    //   }
+    //   return r;
+    // We vectorize this case as follows:
+    // 1. Create a Splat with the int_min/int_max values, depending on the loop
+    // direction.
+    // 2. The Src is filled with values: {0, 1, 2, 3, 4, ...}.
+    // 3. The Right is filled with values: {0, 1, 331, 331, 331, ...}.
+    // 4. The CmpVector is filled with values: {1, 1, 0, 0, 0, ...}.
+    // 5. Select using this CmpVector between Src and the Splat with
+    // int_min/int_max values.
+    // 6. Use a max-reduce/min-reduce on the result of the Select.
+    // 7. Use the exising Cmp which determines whether or not any assignment
+    // took place in the loop to select between the result of the
+    // max-reduce/min-reduce, and the initial value (InitVal).
+    assert(Direction != Loop::LoopBounds::Direction::Unknown &&
+           "Unknown loop direction");
+    auto IdxTy = cast<IntegerType>(InitVal->getType());
+    if (Direction == Loop::LoopBounds::Direction::Increasing) {
+      auto MinValue = Desc.isSigned()
+                          ? APInt::getSignedMinValue(IdxTy->getBitWidth())
+                          : APInt::getMinValue(IdxTy->getBitWidth());
+      Value *MinSplat =
+          Builder.CreateVectorSplat(EC, ConstantInt::get(IdxTy, MinValue));
+      Value *Mask =
+          Builder.CreateSelect(CmpVector, Src, MinSplat, "rdx.select.mask");
+      NewVal = Builder.CreateIntMaxReduce(Mask, Desc.isSigned());
+    } else {
+      auto MaxValue = Desc.isSigned()
+                          ? APInt::getSignedMaxValue(IdxTy->getBitWidth())
+                          : APInt::getMaxValue(IdxTy->getBitWidth());
+      Value *MaxSplat =
+          Builder.CreateVectorSplat(EC, ConstantInt::get(IdxTy, MaxValue));
+      Value *Mask =
+          Builder.CreateSelect(CmpVector, Src, MaxSplat, "rdx.select.mask");
+      NewVal = Builder.CreateIntMinReduce(Mask, Desc.isSigned());
+    }
+  }
 
-  // If any predicate is true it means that we want to select the new value.
-  Cmp = Builder.CreateOrReduce(Cmp);
   return Builder.CreateSelect(Cmp, NewVal, InitVal, "rdx.select");
 }
 
@@ -1102,7 +1145,8 @@
 Value *llvm::createTargetReduction(IRBuilderBase &B,
                                    const TargetTransformInfo *TTI,
                                    const RecurrenceDescriptor &Desc, Value *Src,
-                                   PHINode *OrigPhi) {
+                                   PHINode *OrigPhi, Loop *OrigLoop,
+                                   Loop::LoopBounds::Direction Direction) {
   // TODO: Support in-order reductions based on the recurrence descriptor.
   // All ops in the reduction inherit fast-math-flags from the recurrence
   // descriptor.
@@ -1111,7 +1155,8 @@
 
   RecurKind RK = Desc.getRecurrenceKind();
   if (RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK))
-    return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi);
+    return createSelectCmpTargetReduction(B, TTI, Src, Desc, OrigPhi, OrigLoop,
+                                          Direction);
 
   return createSimpleTargetReduction(B, TTI, Src, RK);
 }
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -488,6 +488,13 @@
       this->MinProfitableTripCount = VecWidth;
     else
       this->MinProfitableTripCount = MinProfitableTripCount;
+
+    auto Bounds = OrigLoop->getBounds(*PSE.getSE());
+    if (!Bounds ||
+        Bounds->getDirection() == Loop::LoopBounds::Direction::Unknown)
+      Direction = Loop::LoopBounds::Direction::Unknown;
+    else
+      Direction = Bounds->getDirection();
   }
 
   virtual ~InnerLoopVectorizer() = default;
@@ -678,6 +685,9 @@
   /// The original loop.
   Loop *OrigLoop;
 
+  /// The direction of iteration of the original loop.
+  Loop::LoopBounds::Direction Direction;
+
   /// A wrapper around ScalarEvolution used to add runtime SCEV checks. Applies
   /// dynamic knowledge to simplify SCEV expressions and converts them to a
   /// more usable form.
@@ -3989,7 +3999,8 @@
   // target reduction in the loop using a Reduction recipe.
   if (VF.isVector() && !PhiR->isInLoop()) {
     ReducedPartRdx =
-        createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, OrigPhi);
+        createTargetReduction(Builder, TTI, RdxDesc, ReducedPartRdx, OrigPhi,
+                              OrigLoop, State.ILV->Direction);
     // If the reduction can be performed in a smaller type, we need to extend
     // the reduction to the wider type before we branch to the original loop.
     if (PhiTy != RdxDesc.getRecurrenceType())
diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
--- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
@@ -912,9 +912,10 @@
 
 @table = constant [13 x i16] [i16 10, i16 35, i16 69, i16 147, i16 280, i16 472, i16 682, i16 1013, i16 1559, i16 2544, i16 4553, i16 6494, i16 10000], align 1 
 
-; CHECK-LABEL: @non_reduction_index(
-; CHECK-NOT:     <4 x i16>
-define i16 @non_reduction_index(i16 noundef %val) {
+; CHECK-LABEL: @reduction_index(
+; CHECK: %[[V1:.*]] = insertelement <4 x i16> poison, i16 %[[ARG:.*]], i64 0
+; CHECK: shufflevector <4 x i16> %[[V1]], <4 x i16> poison, <4 x i32> zeroinitializer
+define i16 @reduction_index(i16 noundef %val) {
 entry:
   br label %for.body
 
@@ -936,9 +937,10 @@
 
 @tablef = constant [13 x half] [half 10.0, half 35.0, half 69.0, half 147.0, half 280.0, half 472.0, half 682.0, half 1013.0, half 1559.0, half 2544.0, half 4556.0, half 6496.0, half 10000.0], align 1 
 
-; CHECK-LABEL: @non_reduction_index_half(
-; CHECK-NOT:     <4 x half>
-define i16 @non_reduction_index_half(half noundef %val) {
+; CHECK-LABEL: @reduction_index_half(
+; CHECK: %[[V1:.*]] = insertelement <4 x half> poison, half %[[ARG:.*]], i64 0
+; CHECK: shufflevector <4 x half> %[[V1]], <4 x half> poison, <4 x i32> zeroinitializer
+define i16 @reduction_index_half(half noundef %val) {
 entry:
   br label %for.body
 
diff --git a/llvm/test/Transforms/LoopVectorize/select-cmp.ll b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
--- a/llvm/test/Transforms/LoopVectorize/select-cmp.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-cmp.ll
@@ -244,6 +244,171 @@
   ret i32 %3
 }
 
+define i32 @test_select_variant_i32_from_fcmp(ptr %a) {
+; CHECK-LABEL: @test_select_variant_i32_from_fcmp(
+; CHECK-VF4IC1-NEXT:  entry:
+; CHECK-VF4IC1-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
+; CHECK-VF4IC1:       for.cond1.preheader:
+; CHECK-VF4IC1-NEXT:    [[NL_015:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
+; CHECK-VF4IC1-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC1:       vector.ph:
+; CHECK-VF4IC1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC1:       vector.body:
+; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
+; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 8
+; CHECK-VF4IC1-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x double> [[WIDE_LOAD]], <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00>
+; CHECK-VF4IC1-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-VF4IC1-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000
+; CHECK-VF4IC1-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-VF4IC1:       middle.block:
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_MASK:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
+; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[RDX_SELECT_MASK]])
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i32 [[TMP7]], i32 -1
+; CHECK-VF4IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 32000, 32000
+; CHECK-VF4IC1-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]]
+; CHECK-VF4IC1:       scalar.ph:
+; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 32000, [[MIDDLE_BLOCK]] ], [ 0, [[FOR_COND1_PREHEADER]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ -1, [[FOR_COND1_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    br label [[FOR_BODY4:%.*]]
+; CHECK-VF4IC1:       for.cond.cleanup:
+; CHECK-VF4IC1-NEXT:    [[J_2_LCSSA_LCSSA:%.*]] = phi i32 [ [[J_2_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
+; CHECK-VF4IC1-NEXT:    ret i32 [[J_2_LCSSA_LCSSA]]
+; CHECK-VF4IC1:       for.cond.cleanup3:
+; CHECK-VF4IC1-NEXT:    [[J_2_LCSSA]] = phi i32 [ [[J_2:%.*]], [[FOR_BODY4]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[INC7]] = add nuw nsw i32 [[NL_015]], 1
+; CHECK-VF4IC1-NEXT:    [[EXITCOND16_NOT:%.*]] = icmp eq i32 [[INC7]], 200000
+; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND16_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
+; CHECK-VF4IC1:       for.body4:
+; CHECK-VF4IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4]] ]
+; CHECK-VF4IC1-NEXT:    [[J_113:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[J_2]], [[FOR_BODY4]] ]
+; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; CHECK-VF4IC1-NEXT:    [[CMP5:%.*]] = fcmp ogt double [[TMP8]], 3.000000e+00
+; CHECK-VF4IC1-NEXT:    [[TMP9:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-VF4IC1-NEXT:    [[J_2]] = select i1 [[CMP5]], i32 [[TMP9]], i32 [[J_113]]
+; CHECK-VF4IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32000
+; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]], !llvm.loop [[LOOP3:![0-9]+]]
+;
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %nl.015 = phi i32 [ 0, %entry ], [ %inc7, %for.cond.cleanup3 ]
+  br label %for.body4
+
+for.cond.cleanup:
+  ret i32 %j.2
+
+for.cond.cleanup3:
+  %inc7 = add nuw nsw i32 %nl.015, 1
+  %exitcond16.not = icmp eq i32 %inc7, 200000
+  br i1 %exitcond16.not, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.body4:
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
+  %j.113 = phi i32 [ -1, %for.cond1.preheader ], [ %j.2, %for.body4 ]
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx
+  %cmp5 = fcmp ogt double %0, 3.000000e+00
+  %1 = trunc i64 %indvars.iv to i32
+  %j.2 = select i1 %cmp5, i32 %1, i32 %j.113
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 32000
+  br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4
+}
+
+define i32 @test_select_variant_i32_from_fcmp_reverse(ptr %a) {
+; CHECK-LABEL: @test_select_variant_i32_from_fcmp_reverse(
+; CHECK-VF4IC1-NEXT:  entry:
+; CHECK-VF4IC1-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
+; CHECK-VF4IC1:       for.cond1.preheader:
+; CHECK-VF4IC1-NEXT:    [[NL_014:%.*]] = phi i32 [ 199999, [[ENTRY:%.*]] ], [ [[DEC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
+; CHECK-VF4IC1-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-VF4IC1:       vector.ph:
+; CHECK-VF4IC1-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK-VF4IC1:       vector.body:
+; CHECK-VF4IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-VF4IC1-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-VF4IC1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP0]]
+; CHECK-VF4IC1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
+; CHECK-VF4IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 8
+; CHECK-VF4IC1-NEXT:    [[TMP3:%.*]] = fcmp ogt <4 x double> [[WIDE_LOAD]], <double 3.000000e+00, double 3.000000e+00, double 3.000000e+00, double 3.000000e+00>
+; CHECK-VF4IC1-NEXT:    [[TMP4]] = select <4 x i1> [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]]
+; CHECK-VF4IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-VF4IC1-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-VF4IC1-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32000
+; CHECK-VF4IC1-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-VF4IC1:       middle.block:
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-VF4IC1-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT_MASK:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i32> [[TMP4]], <4 x i32> zeroinitializer
+; CHECK-VF4IC1-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[RDX_SELECT_MASK]])
+; CHECK-VF4IC1-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i32 [[TMP7]], i32 -1
+; CHECK-VF4IC1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 32000, 32000
+; CHECK-VF4IC1-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]]
+; CHECK-VF4IC1:       scalar.ph:
+; CHECK-VF4IC1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 32000, [[MIDDLE_BLOCK]] ], [ 0, [[FOR_COND1_PREHEADER]] ]
+; CHECK-VF4IC1-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ -1, [[FOR_COND1_PREHEADER]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    br label [[FOR_BODY4:%.*]]
+; CHECK-VF4IC1:       for.cond.cleanup:
+; CHECK-VF4IC1-NEXT:    [[J_2_LCSSA_LCSSA:%.*]] = phi i32 [ [[J_2_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
+; CHECK-VF4IC1-NEXT:    ret i32 [[J_2_LCSSA_LCSSA]]
+; CHECK-VF4IC1:       for.cond.cleanup3:
+; CHECK-VF4IC1-NEXT:    [[J_2_LCSSA]] = phi i32 [ [[J_2:%.*]], [[FOR_BODY4]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-VF4IC1-NEXT:    [[DEC]] = add nsw i32 [[NL_014]], -1
+; CHECK-VF4IC1-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[NL_014]], 0
+; CHECK-VF4IC1-NEXT:    br i1 [[CMP_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
+; CHECK-VF4IC1:       for.body4:
+; CHECK-VF4IC1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4]] ]
+; CHECK-VF4IC1-NEXT:    [[J_112:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[J_2]], [[FOR_BODY4]] ]
+; CHECK-VF4IC1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-VF4IC1-NEXT:    [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 8
+; CHECK-VF4IC1-NEXT:    [[CMP5:%.*]] = fcmp ogt double [[TMP8]], 3.000000e+00
+; CHECK-VF4IC1-NEXT:    [[TMP9:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-VF4IC1-NEXT:    [[J_2]] = select i1 [[CMP5]], i32 [[TMP9]], i32 [[J_112]]
+; CHECK-VF4IC1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-VF4IC1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 32000
+; CHECK-VF4IC1-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]], !llvm.loop [[LOOP5:![0-9]+]]
+;
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %nl.014 = phi i32 [ 199999, %entry ], [ %dec, %for.cond.cleanup3 ]
+  br label %for.body4
+
+for.cond.cleanup:
+  ret i32 %j.2
+
+for.cond.cleanup3:
+  %dec = add nsw i32 %nl.014, -1
+  %cmp.not = icmp eq i32 %nl.014, 0
+  br i1 %cmp.not, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.body4:
+  %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
+  %j.112 = phi i32 [ -1, %for.cond1.preheader ], [ %j.2, %for.body4 ]
+  %arrayidx = getelementptr inbounds double, ptr %a, i64 %indvars.iv
+  %0 = load double, ptr %arrayidx
+  %cmp5 = fcmp ogt double %0, 3.000000e+00
+  %1 = trunc i64 %indvars.iv to i32
+  %j.2 = select i1 %cmp5, i32 %1, i32 %j.112
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 32000
+  br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4
+}
 
 ; Negative tests
 
@@ -296,10 +461,9 @@
   ret i32 %6
 }
 
-
-; We don't support selecting loop-variant values.
-define i32 @select_variant_i32_from_icmp(ptr nocapture readonly %v1, ptr nocapture readonly %v2, i64 %n) {
-; CHECK-LABEL: @select_variant_i32_from_icmp
+; We only support selecting loop-invariant values or values with a constant AddRec step.
+define i32 @select_variant_load_from_icmp(ptr nocapture readonly %v1, ptr nocapture readonly %v2, i64 %n) {
+; CHECK-LABEL: @select_variant_load_from_icmp
 ; CHECK-NOT: vector.body
 entry:
   br label %for.body
@@ -321,7 +485,6 @@
   ret i32 %7
 }
 
-
 ; We only support selects where the input comes from the same PHI as the
 ; reduction PHI. In the example below, the select uses the induction
 ; variable input and the icmp uses the reduction PHI.
diff --git a/llvm/test/Transforms/LoopVectorize/select-min-index.ll b/llvm/test/Transforms/LoopVectorize/select-min-index.ll
--- a/llvm/test/Transforms/LoopVectorize/select-min-index.ll
+++ b/llvm/test/Transforms/LoopVectorize/select-min-index.ll
@@ -1,6 +1,4 @@
 ; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
-; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S %s | FileCheck %s
-; RUN: opt -passes=loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S %s | FileCheck %s
 
 ; Test cases for selecting the index with the minimum value.
 
@@ -81,9 +79,59 @@
   ret i64 %res
 }
 
-define i64 @test_not_vectorize_select_no_min_reduction(ptr %src) {
-; CHECK-LABEL: @test_not_vectorize_select_no_min_reduction(
-; CHECK-NOT:   vector.body:
+define i64 @test_vectorize_select_no_min_reduction(ptr %src) {
+; CHECK-LABEL: @test_vectorize_select_no_min_reduction(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i64, ptr [[SRC:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3]] = add <4 x i64> [[WIDE_LOAD]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ugt <4 x i64> [[TMP4]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i64> [[TMP6]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]])
+; CHECK-NEXT:    [[RDX_SELECT_MASK:%.*]] = select <4 x i1> [[RDX_SELECT_CMP]], <4 x i64> [[TMP6]], <4 x i64> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[RDX_SELECT_MASK]])
+; CHECK-NEXT:    [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[TMP9]], i64 0
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 0, 0
+; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP3]], i32 3
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_IDX:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MIN_IDX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[MIN_VAL_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT:    [[L:%.*]] = load i64, ptr [[GEP]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[SCALAR_RECUR]], [[L]]
+; CHECK-NEXT:    [[MIN_VAL_NEXT]] = add i64 [[L]], 1
+; CHECK-NEXT:    [[FOO:%.*]] = call i64 @llvm.umin.i64(i64 [[SCALAR_RECUR]], i64 [[L]])
+; CHECK-NEXT:    [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i64 [[RES]]
 ;
 entry:
   br label %loop
@@ -107,7 +155,6 @@
   ret i64 %res
 }
 
-
 define i64 @test_not_vectorize_cmp_value(i64 %x) {
 ; CHECK-LABEL: @test_not_vectorize_cmp_value(
 ; CHECK-NOT:   vector.body: