diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -406,9 +406,9 @@
   /// according to the assumptions that we've made during the analysis.
   /// The method might also version the pointer stride according to \p Strides,
   /// and add new predicates to \p PSE.
-  void insert(Loop *Lp, Value *Ptr, Type *AccessTy, bool WritePtr,
-              unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides,
-              PredicatedScalarEvolution &PSE);
+  void insert(Loop *Lp, Value *Ptr, Type *AccessTy, const SCEV *PtrExpr,
+              bool WritePtr, unsigned DepSetId, unsigned ASId,
+              const ValueToValueMap &Strides, PredicatedScalarEvolution &PSE);
 
   /// No run-time memory checking is necessary.
   bool empty() const { return Pointers.empty(); }
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -128,6 +128,11 @@
     cl::desc("Enable conflict detection in loop-access analysis"),
     cl::init(true));
 
+static cl::opt<unsigned> MaxForkedSCEVDepth(
+    "max-forked-scev-depth", cl::Hidden,
+    cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
+    cl::init(5));
+
 bool VectorizerParams::isInterleaveForced() {
   return ::VectorizationInterleave.getNumOccurrences() > 0;
 }
@@ -189,12 +194,12 @@
 /// There is no conflict when the intervals are disjoint:
 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, Type *AccessTy,
-                                    bool WritePtr, unsigned DepSetId,
-                                    unsigned ASId,
+                                    const SCEV *PtrExpr, bool WritePtr,
+                                    unsigned DepSetId, unsigned ASId,
                                     const ValueToValueMap &Strides,
                                     PredicatedScalarEvolution &PSE) {
   // Get the stride replaced scev.
-  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+  const SCEV *Sc = PtrExpr; // replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
   ScalarEvolution *SE = PSE.getSE();
 
   const SCEV *ScStart;
@@ -370,9 +375,12 @@
 
   unsigned TotalComparisons = 0;
 
-  DenseMap<Value *, unsigned> PositionMap;
-  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
-    PositionMap[Pointers[Index].PointerValue] = Index;
+  DenseMap<Value *, SmallVector<unsigned, 2>> PositionMap;
+  for (unsigned Index = 0; Index < Pointers.size(); ++Index) {
+    auto Iter = PositionMap.insert({Pointers[Index].PointerValue, {}});
+    Iter.first->second.push_back(Index);
+  }
+  // PositionMap[Pointers[Index].PointerValue] = Index;
 
   // We need to keep track of what pointers we've already seen so we
   // don't process them twice.
@@ -403,34 +411,35 @@
       auto PointerI = PositionMap.find(MI->getPointer());
       assert(PointerI != PositionMap.end() &&
              "pointer in equivalence class not found in PositionMap");
-      unsigned Pointer = PointerI->second;
-      bool Merged = false;
-      // Mark this pointer as seen.
-      Seen.insert(Pointer);
-
-      // Go through all the existing sets and see if we can find one
-      // which can include this pointer.
-      for (RuntimeCheckingPtrGroup &Group : Groups) {
-        // Don't perform more than a certain amount of comparisons.
-        // This should limit the cost of grouping the pointers to something
-        // reasonable.  If we do end up hitting this threshold, the algorithm
-        // will create separate groups for all remaining pointers.
-        if (TotalComparisons > MemoryCheckMergeThreshold)
-          break;
-
-        TotalComparisons++;
-
-        if (Group.addPointer(Pointer, *this)) {
-          Merged = true;
-          break;
+      for (unsigned Pointer : PointerI->second) {
+        bool Merged = false;
+        // Mark this pointer as seen.
+        Seen.insert(Pointer);
+
+        // Go through all the existing sets and see if we can find one
+        // which can include this pointer.
+        for (RuntimeCheckingPtrGroup &Group : Groups) {
+          // Don't perform more than a certain amount of comparisons.
+          // This should limit the cost of grouping the pointers to something
+          // reasonable.  If we do end up hitting this threshold, the algorithm
+          // will create separate groups for all remaining pointers.
+          if (TotalComparisons > MemoryCheckMergeThreshold)
+            break;
+
+          TotalComparisons++;
+
+          if (Group.addPointer(Pointer, *this)) {
+            Merged = true;
+            break;
+          }
         }
-      }
 
-      if (!Merged)
-        // We couldn't add this pointer to any existing set or the threshold
-        // for the number of comparisons has been reached. Create a new group
-        // to hold the current pointer.
-        Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
+        if (!Merged)
+          // We couldn't add this pointer to any existing set or the threshold
+          // for the number of comparisons has been reached. Create a new group
+          // to hold the current pointer.
+          Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
+      }
     }
 
     // We've computed the grouped checks for this partition.
@@ -629,10 +638,9 @@
 /// Check whether a pointer can participate in a runtime bounds check.
 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
 /// by adding run-time checks (overflow checks) if necessary.
-static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
-                                const ValueToValueMap &Strides, Value *Ptr,
-                                Loop *L, bool Assume) {
-  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
+                                const SCEV *PtrScev, Loop *L, bool Assume) {
+  //  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
 
   // The bounds for loop-invariant pointer is trivial.
   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
@@ -687,6 +695,176 @@
   }
 }
 
+// Walk back through the IR for a pointer, looking for a select like the
+// following:
+//
+//  %offset = select i1 %cmp, i64 %a, i64 %b
+//  %addr = getelementptr double, double* %base, i64 %offset
+//  %ld = load double, double* %addr, align 8
+//
+// We won't be able to form a single SCEVAddRecExpr from this since the
+// address for each loop iteration depends on %cmp. We could potentially
+// produce multiple valid SCEVAddRecExprs, though, and check all of them for
+// memory safety/aliasing if needed.
+//
+// If we encounter some IR we don't yet handle, or something obviously fine
+// like a constant, then we just add the SCEV for that term to the list passed
+// in by the caller. If we have a node that may potentially yield a valid
+// SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
+// ourselves before adding to the list.
+static void findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr,
+                            SmallVectorImpl<const SCEV *> &ScevList,
+                            unsigned Depth) {
+  // If our Value is loop invariant or a SCEVAddRecExpr, we already have
+  // a usable value. If it's not an instruction or we've exceeded our limit
+  // on recursion, just return whatever we have regardless of whether it can
+  // be used for a forked pointer or not.
+  const SCEV *Scev = SE->getSCEV(Ptr);
+  if (SE->isLoopInvariant(Scev, L) || isa<SCEVAddRecExpr>(Scev) ||
+      !isa<Instruction>(Ptr) || Depth == 0) {
+    ScevList.push_back(Scev);
+    return;
+  }
+
+  Depth--;
+
+  auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {
+    switch (Opcode) {
+    case Instruction::Add:
+      return SE->getAddExpr(L, R);
+    case Instruction::Sub:
+      return SE->getMinusSCEV(L, R);
+    case Instruction::Mul:
+      return SE->getMulExpr(L, R);
+    default:
+      llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");
+    }
+  };
+
+  Instruction *I = cast<Instruction>(Ptr);
+  unsigned Opcode = I->getOpcode();
+  switch (Opcode) {
+  case Instruction::BitCast:
+    findForkedSCEVs(SE, L, I->getOperand(0), ScevList, Depth);
+    break;
+  case Instruction::SExt:
+  case Instruction::ZExt: {
+    SmallVector<const SCEV *, 2> ExtScevs;
+    findForkedSCEVs(SE, L, I->getOperand(0), ExtScevs, Depth);
+    for (const SCEV *Scev : ExtScevs)
+      if (Opcode == Instruction::SExt)
+        ScevList.push_back(SE->getSignExtendExpr(Scev, I->getType()));
+      else
+        ScevList.push_back(SE->getZeroExtendExpr(Scev, I->getType()));
+    break;
+  }
+  case Instruction::GetElementPtr: {
+    GetElementPtrInst *GEP = cast<GetElementPtrInst>(I);
+    Type *SourceTy = GEP->getSourceElementType();
+    // We only handle base + single offset GEPs here for now.
+    // Not dealing with preexisting gathers yet, so no vectors.
+    if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
+      ScevList.push_back(Scev);
+      break;
+    }
+    SmallVector<const SCEV *, 2> BaseScevs;
+    SmallVector<const SCEV *, 2> OffsetScevs;
+    findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
+    findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
+
+    // Make sure we get the correct pointer type to extend to, including the
+    // address space.
+    const SCEV *BaseExpr = SE->getSCEV(GEP->getPointerOperand());
+    Type *IntPtrTy = SE->getEffectiveSCEVType(BaseExpr->getType());
+    SCEV::NoWrapFlags Wrap =
+        GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+    // Find the size of the type being pointed to. We only have a single
+    // index term (guarded above) so we don't need to index into arrays or
+    // structures, just get the size of the scalar value.
+    const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
+
+    if (OffsetScevs.size() == 2 && BaseScevs.size() == 1) {
+      const SCEV *Off1 = SE->getTruncateOrSignExtend(OffsetScevs[0], IntPtrTy);
+      const SCEV *Off2 = SE->getTruncateOrSignExtend(OffsetScevs[1], IntPtrTy);
+      const SCEV *Mul1 = SE->getMulExpr(Size, Off1, Wrap);
+      const SCEV *Mul2 = SE->getMulExpr(Size, Off2, Wrap);
+      const SCEV *Add1 = SE->getAddExpr(BaseScevs[0], Mul1, Wrap);
+      const SCEV *Add2 = SE->getAddExpr(BaseScevs[0], Mul2, Wrap);
+      ScevList.push_back(Add1);
+      ScevList.push_back(Add2);
+    } else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1) {
+      const SCEV *Off = SE->getTruncateOrSignExtend(OffsetScevs[0], IntPtrTy);
+      const SCEV *Mul = SE->getMulExpr(Size, Off, Wrap);
+      const SCEV *Add1 = SE->getAddExpr(BaseScevs[0], Mul, Wrap);
+      const SCEV *Add2 = SE->getAddExpr(BaseScevs[1], Mul, Wrap);
+      ScevList.push_back(Add1);
+      ScevList.push_back(Add2);
+    } else
+      ScevList.push_back(Scev);
+    break;
+  }
+  case Instruction::Select: {
+    SmallVector<const SCEV *, 2> ChildScevs;
+    // A select means we've found a forked pointer, but we currently only
+    // support a single select per pointer so if there's another behind this
+    // then we just bail out and return the generic SCEV.
+    findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
+    findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
+    if (ChildScevs.size() == 2) {
+      ScevList.push_back(ChildScevs[0]);
+      ScevList.push_back(ChildScevs[1]);
+    } else
+      ScevList.push_back(Scev);
+    break;
+  }
+  // If adding another binop to this list, update GetBinOpExpr above
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul: {
+    SmallVector<const SCEV *, 2> LScevs;
+    SmallVector<const SCEV *, 2> RScevs;
+    findForkedSCEVs(SE, L, I->getOperand(0), LScevs, Depth);
+    findForkedSCEVs(SE, L, I->getOperand(1), RScevs, Depth);
+    if (LScevs.size() == 2 && RScevs.size() == 1) {
+      const SCEV *Op1 = GetBinOpExpr(Opcode, LScevs[0], RScevs[0]);
+      const SCEV *Op2 = GetBinOpExpr(Opcode, LScevs[1], RScevs[0]);
+      ScevList.push_back(Op1);
+      ScevList.push_back(Op2);
+    } else if (LScevs.size() == 1 && RScevs.size() == 2) {
+      const SCEV *Op1 = GetBinOpExpr(Opcode, LScevs[0], RScevs[0]);
+      const SCEV *Op2 = GetBinOpExpr(Opcode, LScevs[0], RScevs[1]);
+      ScevList.push_back(Op1);
+      ScevList.push_back(Op2);
+    } else
+      ScevList.push_back(Scev);
+    break;
+  }
+  default:
+    // Just return the current SCEV if we haven't handled the instruction yet.
+    LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
+    ScevList.push_back(Scev);
+    break;
+  }
+
+  return;
+}
+
+static SmallVector<const SCEV *>
+findForkedPointer(PredicatedScalarEvolution &PSE,
+                  const ValueToValueMap &StridesMap, Value *Ptr,
+                  const Loop *L) {
+  ScalarEvolution *SE = PSE.getSE();
+  assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
+  SmallVector<const SCEV *, 2> Scevs;
+  findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
+
+  // For now, we will only accept a forked pointer with two options.
+  if (Scevs.size() == 2)
+    return std::move(Scevs);
+
+  return {replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr)};
+}
+
 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
                                           MemAccessInfo Access, Type *AccessTy,
                                           const ValueToValueMap &StridesMap,
@@ -696,34 +874,54 @@
                                           bool Assume) {
   Value *Ptr = Access.getPointer();
 
-  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
-    return false;
+  SmallVector<const SCEV *> TranslatedPtrs =
+      findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
 
-  // When we run after a failing dependency check we have to make sure
-  // we don't have wrapping pointers.
-  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
-    auto *Expr = PSE.getSCEV(Ptr);
-    if (!Assume || !isa<SCEVAddRecExpr>(Expr))
+  for (const SCEV *PtrExpr : TranslatedPtrs) {
+    if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume))
       return false;
-    PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+
+    // When we run after a failing dependency check we have to make sure
+    // we don't have wrapping pointers.
+    if (ShouldCheckWrap) {
+      // If we forked a pointer via a select, don't check for wrapping
+      // behaviour.
+      // TODO: Implement this; requires checking the SCEVs individually
+      // instead of the overall ptr, since that just resolves to a SCEVUnknown.
+      if (TranslatedPtrs.size() > 1)
+        return false;
+
+      if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) {
+        auto *Expr = PSE.getSCEV(Ptr);
+        if (!Assume || !isa<SCEVAddRecExpr>(Expr))
+          return false;
+        PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+      }
+    }
+    // Is this needed? I think the method above forced it, so can be skipped?
+    if (TranslatedPtrs.size() == 1)
+      TranslatedPtrs[0] = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
   }
 
-  // The id of the dependence set.
-  unsigned DepId;
+  for (const SCEV *PtrExpr : TranslatedPtrs) {
+    // The id of the dependence set.
+    unsigned DepId;
 
-  if (isDependencyCheckNeeded()) {
-    Value *Leader = DepCands.getLeaderValue(Access).getPointer();
-    unsigned &LeaderId = DepSetId[Leader];
-    if (!LeaderId)
-      LeaderId = RunningDepId++;
-    DepId = LeaderId;
-  } else
-    // Each access has its own dependence set.
-    DepId = RunningDepId++;
+    if (isDependencyCheckNeeded()) {
+      Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+      unsigned &LeaderId = DepSetId[Leader];
+      if (!LeaderId)
+        LeaderId = RunningDepId++;
+      DepId = LeaderId;
+    } else
+      // Each access has its own dependence set.
+      DepId = RunningDepId++;
 
-  bool IsWrite = Access.getInt();
-  RtCheck.insert(TheLoop, Ptr, AccessTy, IsWrite, DepId, ASId, StridesMap, PSE);
-  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+    bool IsWrite = Access.getInt();
+    RtCheck.insert(TheLoop, Ptr, AccessTy, PtrExpr, IsWrite, DepId, ASId,
+                   StridesMap, PSE);
+    LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+  }
 
   return true;
 }
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
--- a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
@@ -3,16 +3,43 @@
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 ; CHECK-LABEL: function 'forked_ptrs_different_base_same_offset':
-; CHECK-NEXT:  for.body:
-; CHECK-NEXT:    Report: cannot identify array bounds
-; CHECK-NEXT:    Dependences:
-; CHECK-NEXT:    Run-time memory checks:
-; CHECK-NEXT:    Grouped accesses:
+; CHECK-NEXT: for.body:
+; CHECK-NEXT:   Memory dependences are safe with run-time checks
+; CHECK-NEXT:   Dependences:
+; CHECK-NEXT:   Run-time memory checks:
+; CHECK-NEXT:   Check 0:
+; CHECK-NEXT:     Comparing group
+; CHECK-NEXT:       %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group
+; CHECK-NEXT:       %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+; CHECK-NEXT:   Check 1:
+; CHECK-NEXT:     Comparing group
+; CHECK-NEXT:       %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group
+; CHECK-NEXT:       %.sink.in = getelementptr inbounds float, float* %spec.select, i64 %indvars.iv
+; CHECK-NEXT:   Check 2:
+; CHECK-NEXT:     Comparing group
+; CHECK-NEXT:       %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group
+; CHECK-NEXT:       %.sink.in = getelementptr inbounds float, float* %spec.select, i64 %indvars.iv
+; CHECK-NEXT:   Grouped accesses:
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Dest High: (400 + %Dest))
+; CHECK-NEXT:         Member: {%Dest,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Preds High: (400 + %Preds))
+; CHECK-NEXT:         Member: {%Preds,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Base2 High: (400 + %Base2))
+; CHECK-NEXT:         Member: {%Base2,+,4}<nsw><%for.body>
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Base1 High: (400 + %Base1))
+; CHECK-NEXT:         Member: {%Base1,+,4}<nsw><%for.body>
 ; CHECK-EMPTY:
-; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:    SCEV assumptions:
+; CHECK-NEXT:   Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:   SCEV assumptions:
 ; CHECK-EMPTY:
-; CHECK-NEXT:    Expressions re-written:
+; CHECK-NEXT:   Expressions re-written:
 
 ;;;; Derived from the following C code
 ;; void forked_ptrs_different_base_same_offset(float *A, float *B, float *C, int *D) {
@@ -48,16 +75,37 @@
 }
 
 ; CHECK-LABEL: function 'forked_ptrs_same_base_different_offset':
-; CHECK-NEXT:   for.body:
-; CHECK-NEXT:     Report: cannot identify array bounds
-; CHECK-NEXT:     Dependences:
-; CHECK-NEXT:     Run-time memory checks:
-; CHECK-NEXT:     Grouped accesses:
+; CHECK-NEXT: for.body:
+; CHECK-NEXT:   Memory dependences are safe with run-time checks
+; CHECK-NEXT:   Dependences:
+; CHECK-NEXT:   Run-time memory checks:
+; CHECK-NEXT:   Check 0:
+; CHECK-NEXT:     Comparing group
+; CHECK-NEXT:       %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group
+; CHECK-NEXT:       %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+; CHECK-NEXT:   Check 1:
+; CHECK-NEXT:     Comparing group
+; CHECK-NEXT:       %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group
+; CHECK-NEXT:       %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
+; CHECK-NEXT:       %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
+; CHECK-NEXT:   Grouped accesses:
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Dest High: (400 + %Dest))
+; CHECK-NEXT:         Member: {%Dest,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Preds High: (400 + %Preds))
+; CHECK-NEXT:         Member: {%Preds,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Base High: (404 + %Base))
+; CHECK-NEXT:         Member: {(4 + %Base)<nsw>,+,4}<nsw><%for.body>
+; CHECK-NEXT:         Member: {%Base,+,4}<nsw><%for.body>
 ; CHECK-EMPTY:
-; CHECK-NEXT:     Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:     SCEV assumptions:
+; CHECK-NEXT:   Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:   SCEV assumptions:
 ; CHECK-EMPTY:
-; CHECK-NEXT:     Expressions re-written:
+; CHECK-NEXT:   Expressions re-written:
 
 ;;;; Derived from the following C code
 ;; void forked_ptrs_same_base_different_offset(float *A, float *B, int *C) {
@@ -97,19 +145,38 @@
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
-;;;; Cases that can be handled by a forked pointer but are not currently allowed.
-
 ; CHECK-LABEL: function 'forked_ptrs_uniform_and_strided_forks':
-; CHECK-NEXT:  for.body:
-; CHECK-NEXT:    Report: cannot identify array bounds
-; CHECK-NEXT:    Dependences:
-; CHECK-NEXT:    Run-time memory checks:
-; CHECK-NEXT:    Grouped accesses:
+; CHECK-NEXT: for.body:
+; CHECK-NEXT:   Memory dependences are safe with run-time checks
+; CHECK-NEXT:   Dependences:
+; CHECK-NEXT:   Run-time memory checks:
+; CHECK-NEXT:   Check 0:
+; CHECK-NEXT:     Comparing group
+; CHECK-NEXT:       %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group
+; CHECK-NEXT:       %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+; CHECK-NEXT:   Check 1:
+; CHECK-NEXT:     Comparing group
+; CHECK-NEXT:       %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group
+; CHECK-NEXT:       %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
+; CHECK-NEXT:       %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
+; CHECK-NEXT:   Grouped accesses:
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Dest High: (400 + %Dest))
+; CHECK-NEXT:         Member: {%Dest,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Preds High: (400 + %Preds))
+; CHECK-NEXT:         Member: {%Preds,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Base High: (1192 + %Base))
+; CHECK-NEXT:         Member: (16 + %Base)<nsw>
+; CHECK-NEXT:         Member: {%Base,+,12}<nsw><%for.body>
 ; CHECK-EMPTY:
-; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:    SCEV assumptions:
+; CHECK-NEXT:   Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT:   SCEV assumptions:
 ; CHECK-EMPTY:
-; CHECK-NEXT:    Expressions re-written:
+; CHECK-NEXT:   Expressions re-written:
 
 ;;;; Derived from forked_ptrs_same_base_different_offset with a manually
 ;;;; added uniform offset and a mul to provide a stride
@@ -141,6 +208,8 @@
   br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
 }
 
+;;;; Cases that can be handled by a forked pointer but are not currently allowed.
+
 ; CHECK-LABEL:  function 'forked_ptrs_gather_and_contiguous_forks':
 ; CHECK-NEXT:   for.body:
 ; CHECK-NEXT:     Report: cannot identify array bounds
diff --git a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
--- a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
+++ b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
@@ -17,22 +17,84 @@
 define dso_local void @forked_ptrs_different_base_same_offset(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
 ; CHECK-LABEL: @forked_ptrs_different_base_same_offset(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 100
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[PREDS:%.*]], i64 100
+; CHECK-NEXT:    [[SCEVGEP7:%.*]] = getelementptr float, float* [[BASE2:%.*]], i64 100
+; CHECK-NEXT:    [[SCEVGEP10:%.*]] = getelementptr float, float* [[BASE1:%.*]], i64 100
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[SCEVGEP4]] to float*
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt float* [[TMP0]], [[DEST]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[SCEVGEP]] to i32*
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[PREDS]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    [[BOUND012:%.*]] = icmp ugt float* [[SCEVGEP7]], [[DEST]]
+; CHECK-NEXT:    [[BOUND113:%.*]] = icmp ugt float* [[SCEVGEP]], [[BASE2]]
+; CHECK-NEXT:    [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT14]]
+; CHECK-NEXT:    [[BOUND015:%.*]] = icmp ugt float* [[SCEVGEP10]], [[DEST]]
+; CHECK-NEXT:    [[BOUND116:%.*]] = icmp ugt float* [[SCEVGEP]], [[BASE1]]
+; CHECK-NEXT:    [[FOUND_CONFLICT17:%.*]] = and i1 [[BOUND015]], [[BOUND116]]
+; CHECK-NEXT:    [[CONFLICT_RDX18:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT17]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float*> poison, float* [[BASE2]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float*> [[BROADCAST_SPLATINSERT]], <4 x float*> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <4 x float*> poison, float* [[BASE1]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT20:%.*]] = shufflevector <4 x float*> [[BROADCAST_SPLATINSERT19]], <4 x float*> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x float*> [[BROADCAST_SPLAT]], <4 x float*> [[BROADCAST_SPLAT20]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x float*> [[TMP8]], i64 0
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP9]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x float*> [[TMP8]], i64 1
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float*> [[TMP8]], i64 2
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float*> [[TMP8]], i64 3
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load float, float* [[TMP10]], align 4, !alias.scope !3
+; CHECK-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP12]], align 4, !alias.scope !3
+; CHECK-NEXT:    [[TMP19:%.*]] = load float, float* [[TMP14]], align 4, !alias.scope !3
+; CHECK-NEXT:    [[TMP20:%.*]] = load float, float* [[TMP16]], align 4, !alias.scope !3
+; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x float> poison, float [[TMP17]], i64 0
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP18]], i64 1
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x float> [[TMP22]], float [[TMP19]], i64 2
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP20]], i64 3
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[DEST]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP26:%.*]] = bitcast float* [[TMP25]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP24]], <4 x float>* [[TMP26]], align 4, !alias.scope !5, !noalias !7
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], float* [[BASE2:%.*]], float* [[BASE1:%.*]]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP28]], 0
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], float* [[BASE2]], float* [[BASE1]]
 ; CHECK-NEXT:    [[DOTSINK_IN:%.*]] = getelementptr inbounds float, float* [[SPEC_SELECT]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[DOTSINK:%.*]] = load float, float* [[DOTSINK_IN]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[DOTSINK]], float* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, float* [[DEST]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[DOTSINK]], float* [[TMP29]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ;
 entry:
   br label %for.body
@@ -70,26 +132,83 @@
 define dso_local void @forked_ptrs_same_base_different_offset(float* nocapture readonly %Base, float* nocapture %Dest, i32* nocapture readonly %Preds) {
 ; CHECK-LABEL: @forked_ptrs_same_base_different_offset(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 100
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[PREDS:%.*]], i64 100
+; CHECK-NEXT:    [[SCEVGEP7:%.*]] = getelementptr float, float* [[BASE:%.*]], i64 101
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[SCEVGEP4]] to float*
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt float* [[TMP0]], [[DEST]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[SCEVGEP]] to i32*
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[PREDS]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    [[BOUND09:%.*]] = icmp ugt float* [[SCEVGEP7]], [[DEST]]
+; CHECK-NEXT:    [[BOUND110:%.*]] = icmp ugt float* [[SCEVGEP]], [[BASE]]
+; CHECK-NEXT:    [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND13:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT14:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND15:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4, !alias.scope !12
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw <4 x i32> [[VEC_IND13]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[VEC_IND15]]
+; CHECK-NEXT:    [[TMP7:%.*]] = zext <4 x i32> [[TMP6]] to <4 x i64>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i64 0
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[BASE]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i64 1
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[BASE]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i64> [[TMP7]], i64 2
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[BASE]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i64 3
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[BASE]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP9]], align 4, !alias.scope !15
+; CHECK-NEXT:    [[TMP17:%.*]] = load float, float* [[TMP11]], align 4, !alias.scope !15
+; CHECK-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP13]], align 4, !alias.scope !15
+; CHECK-NEXT:    [[TMP19:%.*]] = load float, float* [[TMP15]], align 4, !alias.scope !15
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x float> poison, float [[TMP16]], i64 0
+; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x float> [[TMP20]], float [[TMP17]], i64 1
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP18]], i64 2
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x float> [[TMP22]], float [[TMP19]], i64 3
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[DEST]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP23]], <4 x float>* [[TMP25]], align 4, !alias.scope !17, !noalias !19
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT14]] = add <4 x i32> [[VEC_IND13]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL12:%.*]] = phi i32 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[I_014:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I_014:%.*]] = phi i32 [ [[BC_RESUME_VAL12]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP27]], 0
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_014]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[OFFSET_0:%.*]] = select i1 [[CMP1_NOT]], i32 [[ADD]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[OFFSET_0:%.*]] = select i1 [[CMP1_NOT]], i32 [[ADD]], i32 [[TMP28]]
 ; CHECK-NEXT:    [[IDXPROM213:%.*]] = zext i32 [[OFFSET_0]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[BASE:%.*]], i64 [[IDXPROM213]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX3]], align 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[TMP2]], float* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[BASE]], i64 [[IDXPROM213]]
+; CHECK-NEXT:    [[TMP29:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[TMP29]], float* [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
 ;
 entry:
   br label %for.body