Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5070,55 +5070,40 @@
 void InterleavedAccessInfo::collectConstStridedAccesses(
     MapVector<Instruction *, StrideDescriptor> &StrideAccesses,
     const ValueToValueMap &Strides) {
-  // Holds load/store instructions in program order.
-  SmallVector<Instruction *, 16> AccessList;
+
+  // The module's data layout.
+  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
 
   // Since it's desired that the load/store instructions be maintained in
   // "program order" for the interleaved access analysis, we have to visit the
   // blocks in the loop in reverse postorder (i.e., in a topological order).
   // Such an ordering will ensure that any load/store that may be executed
-  // before a second load/store will precede the second load/store in the
-  // AccessList.
+  // before a second load/store will precede the second load/store in
+  // StrideAccesses.
   LoopBlocksDFS DFS(TheLoop);
   DFS.perform(LI);
-  for (LoopBlocksDFS::RPOIterator I = DFS.beginRPO(), E = DFS.endRPO(); I != E;
-       ++I) {
-    BasicBlock *BB = *I;
-    bool IsPred = LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
-
-    for (auto &I : *BB) {
-      if (!isa<LoadInst>(&I) && !isa<StoreInst>(&I))
+  for (LoopBlocksDFS::RPOIterator BBI = DFS.beginRPO(), BBE = DFS.endRPO();
+       BBI != BBE; ++BBI)
+    for (auto &I : **BBI) {
+      LoadInst *LI = dyn_cast<LoadInst>(&I);
+      StoreInst *SI = dyn_cast<StoreInst>(&I);
+      if (!LI && !SI)
         continue;
-      // FIXME: Currently we can't handle mixed accesses and predicated accesses
-      if (IsPred)
-        return;
 
-      AccessList.push_back(&I);
-    }
-  }
+      Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
+      int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides);
 
-  if (AccessList.empty())
-    return;
+      const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
+      PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+      unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType());
 
-  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
-  for (auto I : AccessList) {
-    LoadInst *LI = dyn_cast<LoadInst>(I);
-    StoreInst *SI = dyn_cast<StoreInst>(I);
-
-    Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
-    int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides);
-
-    const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
-    PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
-    unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType());
-
-    // An alignment of 0 means target ABI alignment.
-    unsigned Align = LI ? LI->getAlignment() : SI->getAlignment();
-    if (!Align)
-      Align = DL.getABITypeAlignment(PtrTy->getElementType());
+      // An alignment of 0 means target ABI alignment.
+      unsigned Align = LI ? LI->getAlignment() : SI->getAlignment();
+      if (!Align)
+        Align = DL.getABITypeAlignment(PtrTy->getElementType());
 
-    StrideAccesses[I] = StrideDescriptor(Stride, Scev, Size, Align);
-  }
+      StrideAccesses[&I] = StrideDescriptor(Stride, Scev, Size, Align);
+    }
 }
 
 // Analyze interleaved accesses and collect them into interleaved load and
@@ -5231,6 +5216,13 @@
       if (DistanceToA % static_cast<int>(DesA.Size))
         continue;
 
+      // If instruction B is in a predicated block, we can't form an
+      // interleaved group with instruction A unless A and B are in the same
+      // block.
+      if (LoopAccessInfo::blockNeedsPredication(B->getParent(), TheLoop, DT) &&
+          A->getParent() != B->getParent())
+        continue;
+
       // The index of B is the index of A plus the related index to A.
       int IndexB =
           Group->getIndex(A) + DistanceToA / static_cast<int>(DesA.Size);
Index: test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
@@ -0,0 +1,107 @@
+; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=2 -force-vector-interleave=1 -enable-interleaved-mem-accesses -vectorize-num-stores-pred=1 -enable-cond-stores-vec < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+%pair = type { i64, i64 }
+
+; CHECK-LABEL: @load_gap_with_pred_store_0
+;
+; CHECK: min.iters.checked
+; CHECK:   %n.mod.vf = and i64 %[[N:.+]], 1
+; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
+; CHECK:   %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
+; CHECK:   %n.vec = sub i64 %[[N]], %[[R]]
+;
+; CHECK: vector.body:
+; CHECK:   %wide.vec = load <4 x i64>, <4 x i64>* %{{.*}}
+; CHECK:   %strided.vec = shufflevector <4 x i64> %wide.vec, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
+; CHECK:   %[[Result:.+]] = and <2 x i64> %strided.vec, %{{.*}}
+;
+; CHECK: pred.store.if
+; CHECK:   %[[X1:.+]] = extractelement <2 x i64> %[[Result]], i32 0
+; CHECK:   store i64 %[[X1]], {{.*}}
+;
+; CHECK: pred.store.if
+; CHECK:   %[[X2:.+]] = extractelement <2 x i64> %[[Result]], i32 1
+; CHECK:   store i64 %[[X2]], {{.*}}
+
+define void @load_gap_with_pred_store_0(%pair *%p, i64 %x, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i  = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
+  %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
+  %0 = load i64, i64* %p.1, align 8
+  %1 = and i64 %0, %x
+  %2 = icmp eq i64 %1, %x
+  br i1 %2, label %if.then, label %if.merge
+
+if.then:
+  store i64 %1, i64* %p.1, align 8
+  br label %if.merge
+
+if.merge:
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}
+
+; CHECK-LABEL: @load_gap_with_pred_store_1
+;
+; CHECK: min.iters.checked
+; CHECK:   %n.mod.vf = and i64 %[[N:.+]], 1
+; CHECK:   %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
+; CHECK:   %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
+; CHECK:   %n.vec = sub i64 %[[N]], %[[R]]
+;
+; CHECK: vector.body:
+; CHECK:   %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}}
+; CHECK:   %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> <i32 0, i32 2>
+; CHECK:   %[[Result:.+]] = and <2 x i64> %strided.vec, %{{.*}}
+;
+; CHECK: pred.store.if
+; CHECK:   %[[X1:.+]] = extractelement <2 x i64> %[[Result]], i32 0
+; CHECK:   store i64 %[[X1]], {{.*}}
+;
+; CHECK: pred.store.if
+; CHECK:   %[[X2:.+]] = extractelement <2 x i64> %[[Result]], i32 1
+; CHECK:   store i64 %[[X2]], {{.*}}
+;
+; CHECK: pred.store.continue
+; CHECK:   %[[L2:.+]] = load <4 x i64>, <4 x i64>* {{.*}}
+; CHECK:   %[[X3:.+]] = extractelement <4 x i64> %[[L2]], i32 0
+; CHECK:   store i64 %[[X3]], {{.*}}
+; CHECK:   %[[X4:.+]] = extractelement <4 x i64> %[[L2]], i32 2
+; CHECK:   store i64 %[[X4]], {{.*}}
+
+define void @load_gap_with_pred_store_1(%pair *%p, i64 %x, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i  = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
+  %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
+  %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
+  %0 = load i64, i64* %p.1, align 8
+  %1 = and i64 %0, %x
+  %2 = icmp eq i64 %1, %x
+  br i1 %2, label %if.then, label %if.merge
+
+if.then:
+  store i64 %1, i64* %p.0, align 8
+  br label %if.merge
+
+if.merge:
+  %3 = load i64, i64* %p.0, align 8
+  store i64 %3, i64 *%p.1, align 8
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  ret void
+}