diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -130,6 +130,11 @@
     cl::desc("Enable conflict detection in loop-access analysis"),
     cl::init(true));
 
+static cl::opt<unsigned> MaxForkedSCEVDepth(
+    "max-forked-scev-depth", cl::Hidden,
+    cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
+    cl::init(5));
+
 bool VectorizerParams::isInterleaveForced() {
   return ::VectorizationInterleave.getNumOccurrences() > 0;
 }
@@ -772,6 +777,126 @@
   }
 }
 
+// Walk back through the IR for a pointer, looking for a select like the
+// following:
+//
+//  %offset = select i1 %cmp, i64 %a, i64 %b
+//  %addr = getelementptr double, double* %base, i64 %offset
+//  %ld = load double, double* %addr, align 8
+//
+// We won't be able to form a single SCEVAddRecExpr from this since the
+// address for each loop iteration depends on %cmp. We could potentially
+// produce multiple valid SCEVAddRecExprs, though, and check all of them for
+// memory safety/aliasing if needed.
+//
+// If we encounter some IR we don't yet handle, or something obviously fine
+// like a constant, then we just add the SCEV for that term to the list passed
+// in by the caller. If we have a node that may potentially yield a valid
+// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
+// ourselves before adding to the list.
+static void findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
+                            SmallVectorImpl<const SCEV *> &ScevList,
+                            unsigned Depth) {
+  // If our Value is loop invariant or a SCEVAddRecExpr, we already have
+  // a usable value. If it's not an instruction or we've exceeded our limit
+  // on recursion, just return whatever we have regardless of whether it can
+  // be used for a forked pointer or not.
+  const SCEV *Scev = SE->getSCEV(Ptr);
+  if (SE->isLoopInvariant(Scev, L) || isa<SCEVAddRecExpr>(Scev) ||
+      !isa<Instruction>(Ptr) || Depth == 0) {
+    ScevList.push_back(Scev);
+    return;
+  }
+
+  Depth--;
+
+  Instruction *I = cast<Instruction>(Ptr);
+  unsigned Opcode = I->getOpcode();
+  switch (Opcode) {
+  case Instruction::GetElementPtr: {
+    GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
+    Type *SourceTy = GEP->getSourceElementType();
+    // We only handle base + single offset GEPs here for now.
+    // Not dealing with preexisting gathers yet, so no vectors.
+    if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
+      ScevList.push_back(Scev);
+      break;
+    }
+    SmallVector<const SCEV *, 2> BaseScevs;
+    SmallVector<const SCEV *, 2> OffsetScevs;
+    findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
+    findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
+
+    // Check that we only have a single fork, on either the base or the offset.
+    // Copy the SCEV across for the one without a fork in order to generate
+    // the full SCEV for both sides of the GEP.
+    if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
+      BaseScevs.push_back(BaseScevs[0]);
+    else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
+      OffsetScevs.push_back(OffsetScevs[0]);
+    else {
+      ScevList.push_back(Scev);
+      break;
+    }
+
+    // Find the pointer type we need to extend to.
+    Type *IntPtrTy = SE->getEffectiveSCEVType(
+        SE->getSCEV(GEP->getPointerOperand())->getType());
+
+    // Find the size of the type being pointed to. We only have a single
+    // index term (guarded above) so we don't need to index into arrays or
+    // structures, just get the size of the scalar value.
+    const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
+
+    // Scale up the offsets by the size of the type, then add to the bases.
+    const SCEV *Scaled1 = SE->getMulExpr(
+        Size, SE->getTruncateOrSignExtend(OffsetScevs[0], IntPtrTy));
+    const SCEV *Scaled2 = SE->getMulExpr(
+        Size, SE->getTruncateOrSignExtend(OffsetScevs[1], IntPtrTy));
+    ScevList.push_back(SE->getAddExpr(BaseScevs[0], Scaled1));
+    ScevList.push_back(SE->getAddExpr(BaseScevs[1], Scaled2));
+    break;
+  }
+  case Instruction::Select: {
+    SmallVector<const SCEV *, 2> ChildScevs;
+    // A select means we've found a forked pointer, but we currently only
+    // support a single select per pointer so if there's another behind this
+    // then we just bail out and return the generic SCEV.
+    findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
+    findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
+    if (ChildScevs.size() == 2) {
+      ScevList.push_back(ChildScevs[0]);
+      ScevList.push_back(ChildScevs[1]);
+    } else
+      ScevList.push_back(Scev);
+    break;
+  }
+  default:
+    // Just return the current SCEV if we haven't handled the instruction yet.
+    LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
+    ScevList.push_back(Scev);
+    break;
+  }
+
+  return;
+}
+
+static SmallVector<const SCEV *>
+findForkedPointer(PredicatedScalarEvolution &PSE,
+                  const ValueToValueMap &StridesMap, Value *Ptr,
+                  const Loop *L) {
+  ScalarEvolution *SE = PSE.getSE();
+  assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
+  SmallVector<const SCEV *, 2> Scevs;
+  findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
+
+  // For now, we will only accept a forked pointer with two options.
+  if (Scevs.size() == 2)
+    return std::move(Scevs);
+
+  return {replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr)};
+}
+
 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
                                           MemAccessInfo Access, Type *AccessTy,
                                           const ValueToValueMap &StridesMap,
@@ -781,13 +906,8 @@
                                           bool Assume) {
   Value *Ptr = Access.getPointer();
 
-  ScalarEvolution &SE = *PSE.getSE();
-  SmallVector<const SCEV *> TranslatedPtrs;
-  if (auto *SI = dyn_cast<SelectInst>(Ptr))
-    TranslatedPtrs = {SE.getSCEV(SI->getOperand(1)),
-                      SE.getSCEV(SI->getOperand(2))};
-  else
-    TranslatedPtrs = {replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr)};
+  SmallVector<const SCEV *> TranslatedPtrs =
+      findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
 
   for (const SCEV *PtrExpr : TranslatedPtrs) {
     if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume))
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
--- a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
@@ -1,4 +1,5 @@
-; RUN: opt -disable-output -passes='print-access-info' %s 2>&1 | FileCheck %s
+; RUN: opt -disable-output -opaque-pointers -passes='print-access-info' %s 2>&1 | FileCheck %s
+; RUN: opt -disable-output -opaque-pointers -passes='print-access-info' -max-forked-scev-depth=2 %s 2>&1 | FileCheck -check-prefix=RECURSE %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
@@ -9,16 +10,16 @@
 ; CHECK-NEXT:    Run-time memory checks:
 ; CHECK-NEXT:    Check 0:
 ; CHECK-NEXT:      Comparing group ([[G1:.+]]):
-; CHECK-NEXT:        %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
-; CHECK-NEXT:        %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
+; CHECK-NEXT:        %gep.Dest = getelementptr inbounds float, ptr %Dest, i64 %iv
+; CHECK-NEXT:        %gep.Dest = getelementptr inbounds float, ptr %Dest, i64 %iv
 ; CHECK-NEXT:      Against group ([[G2:.+]]):
-; CHECK-NEXT:        %select = select i1 %cmp, float* %gep.1, float* %gep.2
+; CHECK-NEXT:        %select = select i1 %cmp, ptr %gep.1, ptr %gep.2
 ; CHECK-NEXT:    Check 1:
 ; CHECK-NEXT:      Comparing group ([[G1]]):
-; CHECK-NEXT:        %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
-; CHECK-NEXT:        %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
+; CHECK-NEXT:        %gep.Dest = getelementptr inbounds float, ptr %Dest, i64 %iv
+; CHECK-NEXT:        %gep.Dest = getelementptr inbounds float, ptr %Dest, i64 %iv
 ; CHECK-NEXT:      Against group ([[G3:.+]]):
-; CHECK-NEXT:        %select = select i1 %cmp, float* %gep.1, float* %gep.2
+; CHECK-NEXT:        %select = select i1 %cmp, ptr %gep.1, ptr %gep.2
 ; CHECK-NEXT:    Grouped accesses:
 ; CHECK-NEXT:      Group [[G1]]
 ; CHECK-NEXT:        (Low: %Dest High: (400 + %Dest))
@@ -58,18 +59,59 @@
   ret void
 }
 
-
 ; CHECK-LABEL: function 'forked_ptrs_different_base_same_offset':
-; CHECK-NEXT:  for.body:
-; CHECK-NEXT:    Report: cannot identify array bounds
-; CHECK-NEXT:    Dependences:
-; CHECK-NEXT:    Run-time memory checks:
-; CHECK-NEXT:    Grouped accesses:
+; CHECK-NEXT: for.body:
+; CHECK-NEXT:   Memory dependences are safe with run-time checks
+; CHECK-NEXT:   Dependences:
+; CHECK-NEXT:   Run-time memory checks:
+; CHECK-NEXT:   Check 0:
+; CHECK-NEXT:     Comparing group ([[G1:.+]]):
+; CHECK-NEXT:       %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group ([[G2:.+]]):
+; CHECK-NEXT:       %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+; CHECK-NEXT:   Check 1:
+; CHECK-NEXT:     Comparing group ([[G1]]):
+; CHECK-NEXT:       %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group ([[G3:.+]]):
+; CHECK-NEXT:       %.sink.in = getelementptr inbounds float, ptr %spec.select, i64 %indvars.iv
+; CHECK-NEXT:   Check 2:
+; CHECK-NEXT:     Comparing group ([[G1]]):
+; CHECK-NEXT:       %1 = getelementptr inbounds float, ptr %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group ([[G4:.+]]):
+; CHECK-NEXT:       %.sink.in = getelementptr inbounds float, ptr %spec.select, i64 %indvars.iv
+; CHECK-NEXT:   Grouped accesses:
+; CHECK-NEXT:     Group [[G1]]:
+; CHECK-NEXT:       (Low: %Dest High: (400 + %Dest))
+; CHECK-NEXT:         Member: {%Dest,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group [[G2]]:
+; CHECK-NEXT:       (Low: %Preds High: (400 + %Preds))
+; CHECK-NEXT:         Member: {%Preds,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group [[G3]]:
+; CHECK-NEXT:       (Low: %Base2 High: (400 + %Base2))
+; CHECK-NEXT:         Member: {%Base2,+,4}<nw><%for.body>
+; CHECK-NEXT:     Group [[G4]]:
+; CHECK-NEXT:       (Low: %Base1 High: (400 + %Base1))
+; CHECK-NEXT:         Member: {%Base1,+,4}<nw><%for.body>
 ; CHECK-EMPTY:
-; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:    SCEV assumptions:
+; CHECK-NEXT:   Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:   SCEV assumptions:
 ; CHECK-EMPTY:
-; CHECK-NEXT:    Expressions re-written:
+; CHECK-NEXT:   Expressions re-written:
+
+;; We have a limit on the recursion depth for finding a loop invariant or
+;; addrec term; confirm we won't exceed that depth by forcing a lower
+;; limit via -max-forked-scev-depth=2
+; RECURSE-LABEL: Loop access info in function 'forked_ptrs_same_base_different_offset':
+; RECURSE-NEXT:   for.body:
+; RECURSE-NEXT:     Report: cannot identify array bounds
+; RECURSE-NEXT:     Dependences:
+; RECURSE-NEXT:     Run-time memory checks:
+; RECURSE-NEXT:     Grouped accesses:
+; RECURSE-EMPTY:
+; RECURSE-NEXT:     Non vectorizable stores to invariant address were not found in loop.
+; RECURSE-NEXT:     SCEV assumptions:
+; RECURSE-EMPTY:
+; RECURSE-NEXT:     Expressions re-written:
 
 ;;;; Derived from the following C code
 ;; void forked_ptrs_different_base_same_offset(float *A, float *B, float *C, int *D) {
@@ -237,3 +279,150 @@
   %exitcond.not = icmp eq i64 %indvars.iv.next, 100
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
+
+;; We don't currently handle a fork in both the base and the offset of a
+;; GEP instruction.
+
+; CHECK-LABEL: Loop access info in function 'forked_ptrs_two_forks_gep':
+; CHECK-NEXT:   for.body:
+; CHECK-NEXT:     Report: cannot identify array bounds
+; CHECK-NEXT:     Dependences:
+; CHECK-NEXT:     Run-time memory checks:
+; CHECK-NEXT:     Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:     Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:     SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:     Expressions re-written:
+
+define dso_local void @forked_ptrs_two_forks_gep(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %cmp1.not = icmp eq i32 %0, 0
+  %spec.select = select i1 %cmp1.not, float* %Base2, float* %Base1
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %offset = select i1 %cmp1.not, i64 %indvars.iv.next, i64 %indvars.iv
+  %.sink.in = getelementptr inbounds float, float* %spec.select, i64 %offset
+  %.sink = load float, float* %.sink.in, align 4
+  %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+  store float %.sink, float* %1, align 4
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+;; We don't handle forks as children of a select
+
+; CHECK-LABEL: Loop access info in function 'forked_ptrs_two_select':
+; CHECK-NEXT:  loop:
+; CHECK-NEXT:    Report: cannot identify array bounds
+; CHECK-NEXT:    Dependences:
+; CHECK-NEXT:    Run-time memory checks:
+; CHECK-NEXT:    Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:    SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:    Expressions re-written:
+
+define void @forked_ptrs_two_select(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture readonly %Base3, float* %Dest) {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %gep.Dest = getelementptr inbounds float, float* %Dest, i64 %iv
+  %l.Dest = load float, float* %gep.Dest
+  %cmp = fcmp une float %l.Dest, 0.0
+  %cmp1 = fcmp une float %l.Dest, 1.0
+  %gep.1 = getelementptr inbounds float, float* %Base1, i64 %iv
+  %gep.2 = getelementptr inbounds float, float* %Base2, i64 %iv
+  %gep.3 = getelementptr inbounds float, float* %Base3, i64 %iv
+  %select = select i1 %cmp, float* %gep.1, float* %gep.2
+  %select1 = select i1 %cmp1, float* %select, float* %gep.3
+  %sink = load float, float* %select1, align 4
+  store float %sink, float* %gep.Dest, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 100
+  br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+;; We don't yet handle geps with more than 2 operands
+; CHECK-LABEL: Loop access info in function 'forked_ptrs_too_many_gep_ops':
+; CHECK-NEXT:   for.body:
+; CHECK-NEXT:     Report: cannot identify array bounds
+; CHECK-NEXT:     Dependences:
+; CHECK-NEXT:     Run-time memory checks:
+; CHECK-NEXT:     Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:     Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:     SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:     Expressions re-written:
+
+define void @forked_ptrs_too_many_gep_ops(ptr nocapture readonly %Base1, ptr nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %cmp1.not = icmp eq i32 %0, 0
+  %spec.select = select i1 %cmp1.not, ptr %Base2, ptr %Base1
+  %.sink.in = getelementptr inbounds [1000 x float], ptr %spec.select, i64 0, i64 %indvars.iv
+  %.sink = load float, ptr %.sink.in, align 4
+  %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+  store float %.sink, float* %1, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+;; We don't currently handle vector GEPs
+; CHECK-LABEL: Loop access info in function 'forked_ptrs_vector_gep':
+; CHECK-NEXT:   for.body:
+; CHECK-NEXT:     Report: cannot identify array bounds
+; CHECK-NEXT:     Dependences:
+; CHECK-NEXT:     Run-time memory checks:
+; CHECK-NEXT:     Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT:     Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:     SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT:     Expressions re-written:
+
+define void @forked_ptrs_vector_gep(ptr nocapture readonly %Base1, ptr nocapture readonly %Base2, ptr nocapture %Dest, ptr nocapture readonly %Preds) {
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %Preds, i64 %indvars.iv
+  %0 = load i32, ptr %arrayidx, align 4
+  %cmp1.not = icmp eq i32 %0, 0
+  %spec.select = select i1 %cmp1.not, ptr %Base2, ptr %Base1
+  %.sink.in = getelementptr inbounds <4 x float>, ptr %spec.select, i64 %indvars.iv
+  %.sink = load <4 x float>, ptr %.sink.in, align 4
+  %1 = getelementptr inbounds <4 x float>, ptr %Dest, i64 %indvars.iv
+  store <4 x float> %.sink, ptr %1, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 4
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 100
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
--- a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
+++ b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
@@ -17,22 +17,79 @@
 define dso_local void @forked_ptrs_different_base_same_offset(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
 ; CHECK-LABEL: @forked_ptrs_different_base_same_offset(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[DEST1:%.*]] = ptrtoint float* [[DEST:%.*]] to i64
+; CHECK-NEXT:    [[PREDS2:%.*]] = ptrtoint i32* [[PREDS:%.*]] to i64
+; CHECK-NEXT:    [[BASE23:%.*]] = ptrtoint float* [[BASE2:%.*]] to i64
+; CHECK-NEXT:    [[BASE15:%.*]] = ptrtoint float* [[BASE1:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 [[DEST1]], [[PREDS2]]
+; CHECK-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i64 [[DEST1]], [[BASE23]]
+; CHECK-NEXT:    [[DIFF_CHECK4:%.*]] = icmp ult i64 [[TMP1]], 16
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[DIFF_CHECK]], [[DIFF_CHECK4]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 [[DEST1]], [[BASE15]]
+; CHECK-NEXT:    [[DIFF_CHECK6:%.*]] = icmp ult i64 [[TMP2]], 16
+; CHECK-NEXT:    [[CONFLICT_RDX7:%.*]] = or i1 [[CONFLICT_RDX]], [[DIFF_CHECK6]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX7]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float*> poison, float* [[BASE2]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float*> [[BROADCAST_SPLATINSERT]], <4 x float*> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <4 x float*> poison, float* [[BASE1]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT9:%.*]] = shufflevector <4 x float*> [[BROADCAST_SPLATINSERT8]], <4 x float*> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = select <4 x i1> [[TMP8]], <4 x float*> [[BROADCAST_SPLAT]], <4 x float*> [[BROADCAST_SPLAT9]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float*> [[TMP9]], i64 0
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP10]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float*> [[TMP9]], i64 1
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x float*> [[TMP9]], i64 2
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x float*> [[TMP9]], i64 3
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP19:%.*]] = load float, float* [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP20:%.*]] = load float, float* [[TMP15]], align 4
+; CHECK-NEXT:    [[TMP21:%.*]] = load float, float* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> poison, float [[TMP18]], i64 0
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x float> [[TMP22]], float [[TMP19]], i64 1
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP20]], i64 2
+; CHECK-NEXT:    [[TMP25:%.*]] = insertelement <4 x float> [[TMP24]], float [[TMP21]], i64 3
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds float, float* [[DEST]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP27:%.*]] = bitcast float* [[TMP26]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP25]], <4 x float>* [[TMP27]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], float* [[BASE2:%.*]], float* [[BASE1:%.*]]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP29:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP29]], 0
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], float* [[BASE2]], float* [[BASE1]]
 ; CHECK-NEXT:    [[DOTSINK_IN:%.*]] = getelementptr inbounds float, float* [[SPEC_SELECT]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[DOTSINK:%.*]] = load float, float* [[DOTSINK_IN]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[DOTSINK]], float* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds float, float* [[DEST]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[DOTSINK]], float* [[TMP30]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ;
 entry:
   br label %for.body