diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -410,9 +410,8 @@
   /// according to the assumptions that we've made during the analysis.
   /// The method might also version the pointer stride according to \p Strides,
   /// and add new predicates to \p PSE.
-  void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId,
-              unsigned ASId, const ValueToValueMap &Strides,
-              PredicatedScalarEvolution &PSE);
+  void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, bool WritePtr,
+              unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE);
 
   /// No run-time memory checking is necessary.
   bool empty() const { return Pointers.empty(); }
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -47,6 +47,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
@@ -66,6 +67,7 @@
 #include <vector>
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "loop-accesses"
 
@@ -189,12 +191,11 @@
 ///
 /// There is no conflict when the intervals are disjoint:
 /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
-void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
-                                    unsigned DepSetId, unsigned ASId,
-                                    const ValueToValueMap &Strides,
+void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
+                                    bool WritePtr, unsigned DepSetId,
+                                    unsigned ASId,
                                     PredicatedScalarEvolution &PSE) {
-  // Get the stride replaced scev.
-  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+  const SCEV *Sc = PtrExpr;
   ScalarEvolution *SE = PSE.getSE();
 
   const SCEV *ScStart;
@@ -371,9 +372,11 @@
 
   unsigned TotalComparisons = 0;
 
-  DenseMap<Value *, unsigned> PositionMap;
-  for (unsigned Index = 0; Index < Pointers.size(); ++Index)
-    PositionMap[Pointers[Index].PointerValue] = Index;
+  DenseMap<Value *, SmallVector<unsigned>> PositionMap;
+  for (unsigned Index = 0; Index < Pointers.size(); ++Index) {
+    auto Iter = PositionMap.insert({Pointers[Index].PointerValue, {}});
+    Iter.first->second.push_back(Index);
+  }
 
   // We need to keep track of what pointers we've already seen so we
   // don't process them twice.
@@ -404,34 +407,35 @@
       auto PointerI = PositionMap.find(MI->getPointer());
       assert(PointerI != PositionMap.end() &&
              "pointer in equivalence class not found in PositionMap");
-      unsigned Pointer = PointerI->second;
-      bool Merged = false;
-      // Mark this pointer as seen.
-      Seen.insert(Pointer);
-
-      // Go through all the existing sets and see if we can find one
-      // which can include this pointer.
-      for (RuntimeCheckingPtrGroup &Group : Groups) {
-        // Don't perform more than a certain amount of comparisons.
-        // This should limit the cost of grouping the pointers to something
-        // reasonable.  If we do end up hitting this threshold, the algorithm
-        // will create separate groups for all remaining pointers.
-        if (TotalComparisons > MemoryCheckMergeThreshold)
-          break;
-
-        TotalComparisons++;
-
-        if (Group.addPointer(Pointer, *this)) {
-          Merged = true;
-          break;
+      for (unsigned Pointer : PointerI->second) {
+        bool Merged = false;
+        // Mark this pointer as seen.
+        Seen.insert(Pointer);
+
+        // Go through all the existing sets and see if we can find one
+        // which can include this pointer.
+        for (RuntimeCheckingPtrGroup &Group : Groups) {
+          // Don't perform more than a certain amount of comparisons.
+          // This should limit the cost of grouping the pointers to something
+          // reasonable.  If we do end up hitting this threshold, the algorithm
+          // will create separate groups for all remaining pointers.
+          if (TotalComparisons > MemoryCheckMergeThreshold)
+            break;
+
+          TotalComparisons++;
+
+          if (Group.addPointer(Pointer, *this)) {
+            Merged = true;
+            break;
+          }
         }
-      }
 
-      if (!Merged)
-        // We couldn't add this pointer to any existing set or the threshold
-        // for the number of comparisons has been reached. Create a new group
-        // to hold the current pointer.
-        Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
+        if (!Merged)
+          // We couldn't add this pointer to any existing set or the threshold
+          // for the number of comparisons has been reached. Create a new group
+          // to hold the current pointer.
+          Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
+      }
     }
 
     // We've computed the grouped checks for this partition.
@@ -631,11 +635,8 @@
 /// Check whether a pointer can participate in a runtime bounds check.
 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
 /// by adding run-time checks (overflow checks) if necessary.
-static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
-                                const ValueToValueMap &Strides, Value *Ptr,
-                                Loop *L, bool Assume) {
-  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
-
+static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
+                                const SCEV *PtrScev, Loop *L, bool Assume) {
   // The bounds for loop-invariant pointer is trivial.
   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
     return true;
@@ -698,34 +699,120 @@
                                           bool Assume) {
   Value *Ptr = Access.getPointer();
 
-  if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
-    return false;
+  auto TranslatePointers = [&](Value *Ptr) -> SmallVector<const SCEV *> {
+    ScalarEvolution &SE = *PSE.getSE();
+    auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+    auto AssumeInBoundsFlags = [&]() {
+      if (!GEP->isInBounds())
+        return false;
+
+      // We'd like to propagate flags from the IR to the corresponding
+      // SCEV nodes, but to do that, we have to ensure that said flag is
+      // valid in the entire defined scope of the SCEV.
+      auto *GEPI = dyn_cast<Instruction>(GEP);
+      // TODO: non-instructions have global scope.  We might be able to
+      // prove some global scope cases
+      return GEPI && programUndefinedIfPoison(GEPI);
+    };
+
+    if (GEP && GEP->getNumOperands() == 2) {
+      if (auto *SI = dyn_cast<SelectInst>(GEP->getOperand(0))) {
+        const SCEV *BaseA = SE.getSCEV(SI->getOperand(1));
+        const SCEV *BaseB = SE.getSCEV(SI->getOperand(2));
+        const SCEV *Offset = SE.getSCEV(GEP->getOperand(1));
+        if (SE.getTypeSizeInBits(Offset->getType()) <
+            SE.getTypeSizeInBits(BaseA->getType()))
+          Offset = SE.getSignExtendExpr(
+              Offset, SE.getEffectiveSCEVType(BaseA->getType()));
+
+        SCEV::NoWrapFlags OffsetWrap =
+            AssumeInBoundsFlags() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+
+        Type *IntIdxTy = SE.getEffectiveSCEVType(BaseA->getType());
+        auto *CurTy = GEP->getSourceElementType();
+        const SCEV *ElementSize = SE.getSizeOfExpr(IntIdxTy, CurTy);
+        // Getelementptr indices are signed.
+        Offset = SE.getTruncateOrSignExtend(Offset, IntIdxTy);
+
+        // Multiply the index by the element size to compute the element
+        // offset.
+        Offset = SE.getMulExpr(Offset, ElementSize, OffsetWrap);
+        auto *PtrA = SE.getAddExpr(BaseA, Offset, SCEV::FlagNUW);
+        auto *PtrB = SE.getAddExpr(BaseB, Offset, SCEV::FlagNUW);
+        return {PtrA, PtrB};
+      } else if (match(GEP->getOperand(1),
+                       m_ZExt(m_Select(m_Value(), m_Value(), m_Value())))) {
+        auto *ZExt = cast<ZExtInst>(GEP->getOperand(1));
+        auto *SI = cast<SelectInst>(ZExt->getOperand(0));
+        const SCEV *OffsetA = SE.getSCEV(SI->getOperand(1));
+        const SCEV *OffsetB = SE.getSCEV(SI->getOperand(2));
+
+        SCEV::NoWrapFlags OffsetWrap =
+            AssumeInBoundsFlags() ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+
+        auto *Base = SE.getSCEV(GEP->getOperand(0));
+        Type *IntIdxTy = SE.getEffectiveSCEVType(Base->getType());
+        auto *CurTy = GEP->getSourceElementType();
+        const SCEV *ElementSize = SE.getSizeOfExpr(IntIdxTy, CurTy);
+        // Getelementptr indices are signed.
+        OffsetA = SE.getTruncateOrSignExtend(OffsetA, IntIdxTy);
+        OffsetB = SE.getTruncateOrSignExtend(OffsetB, IntIdxTy);
+
+        // Multiply the index by the element size to compute the element
+        // offset.
+        OffsetA = SE.getMulExpr(OffsetA, ElementSize, OffsetWrap);
+        OffsetB = SE.getMulExpr(OffsetB, ElementSize, OffsetWrap);
+        auto *PtrA = SE.getAddExpr(Base, OffsetA, SCEV::FlagNUW);
+        auto *PtrB = SE.getAddExpr(Base, OffsetB, SCEV::FlagNUW);
+        return {PtrA, PtrB};
+      }
+    }
+    return {replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr)};
+  };
 
-  // When we run after a failing dependency check we have to make sure
-  // we don't have wrapping pointers.
-  if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
-    auto *Expr = PSE.getSCEV(Ptr);
-    if (!Assume || !isa<SCEVAddRecExpr>(Expr))
+  SmallVector<const SCEV *> TranslatedPtrs = TranslatePointers(Ptr);
+
+  for (const SCEV *PtrExpr : TranslatedPtrs) {
+    if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume))
       return false;
-    PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+
+    // When we run after a failing dependency check we have to make sure
+    // we don't have wrapping pointers.
+    if (ShouldCheckWrap) {
+      if (TranslatedPtrs.size() > 1) {
+        return false;
+      }
+      if (!isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
+        auto *Expr = PSE.getSCEV(Ptr);
+        if (!Assume || !isa<SCEVAddRecExpr>(Expr))
+          return false;
+        PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+      }
+    }
+    // If there's only one option for Ptr, look it up after bounds and wrap
+    // checking, because assumptions might have been added to PSE.
+    if (TranslatedPtrs.size() == 1)
+      TranslatedPtrs[0] = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
   }
 
-  // The id of the dependence set.
-  unsigned DepId;
+  for (const SCEV *PtrExpr : TranslatedPtrs) {
+    // The id of the dependence set.
+    unsigned DepId;
 
-  if (isDependencyCheckNeeded()) {
-    Value *Leader = DepCands.getLeaderValue(Access).getPointer();
-    unsigned &LeaderId = DepSetId[Leader];
-    if (!LeaderId)
-      LeaderId = RunningDepId++;
-    DepId = LeaderId;
-  } else
-    // Each access has its own dependence set.
-    DepId = RunningDepId++;
+    if (isDependencyCheckNeeded()) {
+      Value *Leader = DepCands.getLeaderValue(Access).getPointer();
+      unsigned &LeaderId = DepSetId[Leader];
+      if (!LeaderId)
+        LeaderId = RunningDepId++;
+      DepId = LeaderId;
+    } else
+      // Each access has its own dependence set.
+      DepId = RunningDepId++;
 
-  bool IsWrite = Access.getInt();
-  RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
-  LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+    bool IsWrite = Access.getInt();
+    RtCheck.insert(TheLoop, Ptr, PtrExpr, IsWrite, DepId, ASId, PSE);
+    LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
+  }
 
   return true;
  }
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
--- a/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forked-pointers.ll
@@ -5,10 +5,37 @@
 
 ; CHECK-LABEL: function 'forked_ptrs_different_base_same_offset':
 ; CHECK-NEXT:  for.body:
-; CHECK-NEXT:    Report: cannot identify array bounds
-; CHECK-NEXT:    Dependences:
-; CHECK-NEXT:    Run-time memory checks:
-; CHECK-NEXT:    Grouped accesses:
+; CHECK-NEXT:   Memory dependences are safe with run-time checks
+; CHECK-NEXT:   Dependences:
+; CHECK-NEXT:   Run-time memory checks:
+; CHECK-NEXT:   Check 0:
+; CHECK-NEXT:     Comparing group
+; CHECK-NEXT:       %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group
+; CHECK-NEXT:       %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+; CHECK-NEXT:   Check 1:
+; CHECK-NEXT:     Comparing group
+; CHECK-NEXT:       %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group
+; CHECK-NEXT:       %.sink.in = getelementptr inbounds float, float* %spec.select, i64 %indvars.iv
+; CHECK-NEXT:   Check 2:
+; CHECK-NEXT:     Comparing group
+; CHECK-NEXT:       %1 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group
+; CHECK-NEXT:       %.sink.in = getelementptr inbounds float, float* %spec.select, i64 %indvars.iv
+; CHECK-NEXT:   Grouped accesses:
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Dest High: (400 + %Dest))
+; CHECK-NEXT:         Member: {%Dest,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Preds High: (400 + %Preds))
+; CHECK-NEXT:         Member: {%Preds,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Base2 High: (400 + %Base2))
+; CHECK-NEXT:         Member: {%Base2,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group
+; CHECK-NEXT:       (Low: %Base1 High: (400 + %Base1))
+; CHECK-NEXT:         Member: {%Base1,+,4}<nuw><%for.body>
 ; CHECK-EMPTY:
 ; CHECK-NEXT:    Non vectorizable stores to invariant address were not found in loop.
 ; CHECK-NEXT:    SCEV assumptions:
@@ -50,15 +77,33 @@
 
 ; CHECK-LABEL: function 'forked_ptrs_same_base_different_offset':
 ; CHECK-NEXT:   for.body:
-; CHECK-NEXT:     Report: cannot identify array bounds
-; CHECK-NEXT:     Dependences:
-; CHECK-NEXT:     Run-time memory checks:
-; CHECK-NEXT:     Grouped accesses:
-; CHECK-EMPTY:
-; CHECK-NEXT:     Non vectorizable stores to invariant address were not found in loop.
-; CHECK-NEXT:     SCEV assumptions:
+; CHECK-NEXT:   Memory dependences are safe with run-time checks
+; CHECK-NEXT:   Dependences:
+; CHECK-NEXT:   Run-time memory checks:
+; CHECK-NEXT:   Check 0:
+; CHECK-NEXT:     Comparing group ([[GRP1:.+]]):
+; CHECK-NEXT:       %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group ([[GRP2:.+]]):
+; CHECK-NEXT:       %arrayidx = getelementptr inbounds i32, i32* %Preds, i64 %indvars.iv
+; CHECK-NEXT:   Check 1:
+; CHECK-NEXT:     Comparing group ([[GRP1]]):
+; CHECK-NEXT:       %arrayidx5 = getelementptr inbounds float, float* %Dest, i64 %indvars.iv
+; CHECK-NEXT:     Against group ([[GRP3:.+]]):
+; CHECK-NEXT:       %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
+; CHECK-NEXT:       %arrayidx3 = getelementptr inbounds float, float* %Base, i64 %idxprom213
+; CHECK-NEXT:   Grouped accesses:
+; CHECK-NEXT:     Group [[GRP1]]:
+; CHECK-NEXT:       (Low: %Dest High: (400 + %Dest))
+; CHECK-NEXT:         Member: {%Dest,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group [[GRP2]]:
+; CHECK-NEXT:       (Low: %Preds High: (400 + %Preds))
+; CHECK-NEXT:         Member: {%Preds,+,4}<nuw><%for.body>
+; CHECK-NEXT:     Group [[GRP3]]:
+; CHECK-NEXT:       (Low: %Base High: (404 + %Base))
+; CHECK-NEXT:         Member: {(4 + %Base)<nuw>,+,4}<nuw><%for.body>
+; CHECK-NEXT:         Member: {%Base,+,4}<nuw><%for.body>
 ; CHECK-EMPTY:
-; CHECK-NEXT:     Expressions re-written:
+; CHECK-NEXT:  Non vectorizable stores to invariant address were not found in loop.
 
 ;;;; Derived from the following C code
 ;; void forked_ptrs_same_base_different_offset(float *A, float *B, int *C) {
diff --git a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
--- a/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
+++ b/llvm/test/Transforms/LoopVectorize/forked-pointers.ll
@@ -17,22 +17,84 @@
 define dso_local void @forked_ptrs_different_base_same_offset(float* nocapture readonly %Base1, float* nocapture readonly %Base2, float* nocapture %Dest, i32* nocapture readonly %Preds) {
 ; CHECK-LABEL: @forked_ptrs_different_base_same_offset(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 100
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[PREDS:%.*]], i64 100
+; CHECK-NEXT:    [[SCEVGEP7:%.*]] = getelementptr float, float* [[BASE2:%.*]], i64 100
+; CHECK-NEXT:    [[SCEVGEP10:%.*]] = getelementptr float, float* [[BASE1:%.*]], i64 100
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[SCEVGEP4]] to float*
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt float* [[TMP0]], [[DEST]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[SCEVGEP]] to i32*
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[PREDS]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    [[BOUND012:%.*]] = icmp ugt float* [[SCEVGEP7]], [[DEST]]
+; CHECK-NEXT:    [[BOUND113:%.*]] = icmp ugt float* [[SCEVGEP]], [[BASE2]]
+; CHECK-NEXT:    [[FOUND_CONFLICT14:%.*]] = and i1 [[BOUND012]], [[BOUND113]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT14]]
+; CHECK-NEXT:    [[BOUND015:%.*]] = icmp ugt float* [[SCEVGEP10]], [[DEST]]
+; CHECK-NEXT:    [[BOUND116:%.*]] = icmp ugt float* [[SCEVGEP]], [[BASE1]]
+; CHECK-NEXT:    [[FOUND_CONFLICT17:%.*]] = and i1 [[BOUND015]], [[BOUND116]]
+; CHECK-NEXT:    [[CONFLICT_RDX18:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT17]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float*> poison, float* [[BASE2]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float*> [[BROADCAST_SPLATINSERT]], <4 x float*> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT19:%.*]] = insertelement <4 x float*> poison, float* [[BASE1]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT20:%.*]] = shufflevector <4 x float*> [[BROADCAST_SPLATINSERT19]], <4 x float*> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4, !alias.scope !0
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x float*> [[BROADCAST_SPLAT]], <4 x float*> [[BROADCAST_SPLAT20]]
+; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x float*> [[TMP8]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP9]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x float*> [[TMP8]], i32 1
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP2]]
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x float*> [[TMP8]], i32 2
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x float*> [[TMP8]], i32 3
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP17:%.*]] = load float, float* [[TMP10]], align 4, !alias.scope !3
+; CHECK-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP12]], align 4, !alias.scope !3
+; CHECK-NEXT:    [[TMP19:%.*]] = load float, float* [[TMP14]], align 4, !alias.scope !3
+; CHECK-NEXT:    [[TMP20:%.*]] = load float, float* [[TMP16]], align 4, !alias.scope !3
+; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x float> poison, float [[TMP17]], i32 0
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP18]], i32 1
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x float> [[TMP22]], float [[TMP19]], i32 2
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <4 x float> [[TMP23]], float [[TMP20]], i32 3
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[DEST]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP26:%.*]] = bitcast float* [[TMP25]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP24]], <4 x float>* [[TMP26]], align 4, !alias.scope !5, !noalias !7
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
-; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], float* [[BASE2:%.*]], float* [[BASE1:%.*]]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP28]], 0
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[CMP1_NOT]], float* [[BASE2]], float* [[BASE1]]
 ; CHECK-NEXT:    [[DOTSINK_IN:%.*]] = getelementptr inbounds float, float* [[SPEC_SELECT]], i64 [[INDVARS_IV]]
 ; CHECK-NEXT:    [[DOTSINK:%.*]] = load float, float* [[DOTSINK_IN]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[DOTSINK]], float* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds float, float* [[DEST]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[DOTSINK]], float* [[TMP29]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
 ;
 entry:
   br label %for.body
@@ -70,26 +132,83 @@
 define dso_local void @forked_ptrs_same_base_different_offset(float* nocapture readonly %Base, float* nocapture %Dest, i32* nocapture readonly %Preds) {
 ; CHECK-LABEL: @forked_ptrs_same_base_different_offset(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK:       vector.memcheck:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[DEST:%.*]], i64 100
+; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[PREDS:%.*]], i64 100
+; CHECK-NEXT:    [[SCEVGEP7:%.*]] = getelementptr float, float* [[BASE:%.*]], i64 101
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[SCEVGEP4]] to float*
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt float* [[TMP0]], [[DEST]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[SCEVGEP]] to i32*
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt i32* [[TMP1]], [[PREDS]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    [[BOUND09:%.*]] = icmp ugt float* [[SCEVGEP7]], [[DEST]]
+; CHECK-NEXT:    [[BOUND110:%.*]] = icmp ugt float* [[SCEVGEP]], [[BASE]]
+; CHECK-NEXT:    [[FOUND_CONFLICT11:%.*]] = and i1 [[BOUND09]], [[BOUND110]]
+; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT11]]
+; CHECK-NEXT:    br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND13:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT14:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND15:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4, !alias.scope !12
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw <4 x i32> [[VEC_IND13]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[TMP6:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> [[VEC_IND15]]
+; CHECK-NEXT:    [[TMP7:%.*]] = zext <4 x i32> [[TMP6]] to <4 x i64>
+; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i64> [[TMP7]], i32 0
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[BASE]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i64> [[TMP7]], i32 1
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[BASE]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i64> [[TMP7]], i32 2
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[BASE]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i32 3
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[BASE]], i64 [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP9]], align 4, !alias.scope !15
+; CHECK-NEXT:    [[TMP17:%.*]] = load float, float* [[TMP11]], align 4, !alias.scope !15
+; CHECK-NEXT:    [[TMP18:%.*]] = load float, float* [[TMP13]], align 4, !alias.scope !15
+; CHECK-NEXT:    [[TMP19:%.*]] = load float, float* [[TMP15]], align 4, !alias.scope !15
+; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x float> poison, float [[TMP16]], i32 0
+; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x float> [[TMP20]], float [[TMP17]], i32 1
+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x float> [[TMP21]], float [[TMP18]], i32 2
+; CHECK-NEXT:    [[TMP23:%.*]] = insertelement <4 x float> [[TMP22]], float [[TMP19]], i32 3
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[DEST]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP23]], <4 x float>* [[TMP25]], align 4, !alias.scope !17, !noalias !19
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT14]] = add <4 x i32> [[VEC_IND13]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL12:%.*]] = phi i32 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[I_014:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I_014:%.*]] = phi i32 [ [[BC_RESUME_VAL12]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[PREDS]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[TMP27]], 0
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[I_014]], 1
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[OFFSET_0:%.*]] = select i1 [[CMP1_NOT]], i32 [[ADD]], i32 [[TMP1]]
+; CHECK-NEXT:    [[TMP28:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[OFFSET_0:%.*]] = select i1 [[CMP1_NOT]], i32 [[ADD]], i32 [[TMP28]]
 ; CHECK-NEXT:    [[IDXPROM213:%.*]] = zext i32 [[OFFSET_0]] to i64
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[BASE:%.*]], i64 [[IDXPROM213]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load float, float* [[ARRAYIDX3]], align 4
-; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST:%.*]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store float [[TMP2]], float* [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[BASE]], i64 [[IDXPROM213]]
+; CHECK-NEXT:    [[TMP29:%.*]] = load float, float* [[ARRAYIDX3]], align 4
+; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[DEST]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store float [[TMP29]], float* [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
 ;
 entry:
   br label %for.body