Index: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
===================================================================
--- lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -107,10 +107,11 @@
   splitOddVectorElts(ArrayRef<Value *> Chain, unsigned ElementSizeBits);
 
   /// Checks if there are any instructions which may affect the memory accessed
-  /// in the chain between \p From and \p To. The elements of \p Chain should be
-  /// all loads or all stores.
-  bool isVectorizable(ArrayRef<Value *> Chain, BasicBlock::iterator From,
-                      BasicBlock::iterator To);
+  /// in the chain between \p From and \p To. Return Index, where
+  /// \p Chain[0, Index) is the largest vectorizable chain prefix. The elements
+  /// of \p Chain should be all loads or all stores.
+  unsigned getVectorizableEnd(ArrayRef<Value *> Chain, BasicBlock::iterator From,
+                              BasicBlock::iterator To);
 
   /// Collects load and store instructions to vectorize.
   void collectInstructions(BasicBlock *BB);
@@ -123,10 +124,12 @@
   bool vectorizeInstructions(ArrayRef<Value *> Instrs);
 
   /// Vectorizes the load instructions in Chain.
-  bool vectorizeLoadChain(ArrayRef<Value *> Chain);
+  bool vectorizeLoadChain(ArrayRef<Value *> Chain,
+                          SmallVector<Value *, 16> *InstructionsProcessed);
 
   /// Vectorizes the store instructions in Chain.
-  bool vectorizeStoreChain(ArrayRef<Value *> Chain);
+  bool vectorizeStoreChain(ArrayRef<Value *> Chain,
+                           SmallVector<Value *, 16> *InstructionsProcessed);
 
   /// Check if this load/store access is misaligned accesses
   bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
@@ -347,11 +350,11 @@
       if (!IM || IM->getOpcode() == Instruction::PHI)
         continue;
 
-      if (!DT.dominates(IM, IW)) {
+      if (!DT.dominates(IM, I)) {
         InstructionsToMove.insert(IM);
         Worklist.push_back(IM);
-        DEBUG(assert(IM->getParent() == IW->getParent() &&
-                     "Instructions to move should be in the same basic block"));
+        assert(IM->getParent() == IW->getParent() &&
+               "Instructions to move should be in the same basic block");
       }
     }
   }
@@ -422,9 +425,9 @@
   return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
 }
 
-bool Vectorizer::isVectorizable(ArrayRef<Value *> Chain,
-                                BasicBlock::iterator From,
-                                BasicBlock::iterator To) {
+unsigned Vectorizer::getVectorizableEnd(ArrayRef<Value *> Chain,
+                                        BasicBlock::iterator From,
+                                        BasicBlock::iterator To) {
   SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
   SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
 
@@ -437,19 +440,20 @@
         ChainInstrs.push_back({&*I, Idx});
     } else if (I->mayHaveSideEffects()) {
       DEBUG(dbgs() << "LSV: Found side-effecting operation: " << *I << '\n');
-      return false;
+      return 0;
     }
   }
 
   assert(Chain.size() == ChainInstrs.size() &&
          "All instructions in the Chain must exist in [From, To).");
 
-  for (auto EntryMem : MemoryInstrs) {
-    Value *V = EntryMem.first;
-    unsigned VIdx = EntryMem.second;
-    for (auto EntryChain : ChainInstrs) {
-      Value *VV = EntryChain.first;
-      unsigned VVIdx = EntryChain.second;
+  unsigned Index = 0;
+  for (auto EntryChain : ChainInstrs) {
+    Value *VV = EntryChain.first;
+    unsigned VVIdx = EntryChain.second;
+    for (auto EntryMem : MemoryInstrs) {
+      Value *V = EntryMem.first;
+      unsigned VIdx = EntryMem.second;
       if (isa<LoadInst>(V) && isa<LoadInst>(VV))
         continue;
 
@@ -479,12 +483,12 @@
                  << *VV << " aliases " << *Ptr1 << '\n';
         });
 
-        return false;
+        return Index;
       }
     }
+    Index++;
   }
-
-  return true;
+  return Chain.size();
 }
 
 void Vectorizer::collectInstructions(BasicBlock *BB) {
@@ -617,8 +621,16 @@
   bool Changed = false;
   SmallPtrSet<Value *, 16> VectorizedValues;
 
-  for (int Head : Heads) {
-    if (Tails.count(Head))
+  for (unsigned i = 0; i < Heads.size(); i++) {
+    int Head = Heads[i];
+    if (VectorizedValues.count(Instrs[Head]))
+      continue;
+    for (unsigned j = 0; j < Tails.size(); j++)
+      if (Head == Tails[j] && !VectorizedValues.count(Instrs[Heads[j]])) {
+        Head = -1;
+        break;
+      }
+    if (Head < 0)
       continue;
 
     // We found an instr that starts a chain. Now follow the chain and try to
@@ -634,21 +646,24 @@
     }
 
     bool Vectorized = false;
+    SmallVector<Value *, 16> InstructionsProcessed;
     if (isa<LoadInst>(*Operands.begin()))
-      Vectorized = vectorizeLoadChain(Operands);
+      Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
     else
-      Vectorized = vectorizeStoreChain(Operands);
+      Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
 
-    // Mark the vectorized instructions so that we don't vectorize them again.
-    if (Vectorized)
-      VectorizedValues.insert(Operands.begin(), Operands.end());
+    // Mark the processed instructions so that we don't try to
+    // vectorize them again.
+    if (InstructionsProcessed.size())
+      VectorizedValues.insert(InstructionsProcessed.begin(), InstructionsProcessed.end());
     Changed |= Vectorized;
   }
 
   return Changed;
 }
 
-bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain) {
+bool Vectorizer::vectorizeStoreChain(ArrayRef<Value *> Chain,
+                                     SmallVector<Value *, 16> *InstructionsProcessed) {
   StoreInst *S0 = cast<StoreInst>(Chain[0]);
 
   // If the vector has an int element, default to int for the whole load.
@@ -671,8 +686,28 @@
   unsigned VF = VecRegSize / Sz;
   unsigned ChainSize = Chain.size();
 
-  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2)
+  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
+    InstructionsProcessed->append(Chain.begin(), Chain.end());
+    return false;
+  }
+
+  BasicBlock::iterator First, Last;
+  std::tie(First, Last) = getBoundaryInstrs(Chain);
+  unsigned StopChain = getVectorizableEnd(Chain, First, Last);
+  if (StopChain == 0) {
+    // There exists a side effect instruction, no vectorization possible.
+    InstructionsProcessed->append(Chain.begin(), Chain.end());
     return false;
+  }
+  if (StopChain == 1) {
+    // Failed after the first instruction. Discard it and try the smaller chain.
+    InstructionsProcessed->push_back(Chain.front());
+    return false;
+  }
+
+  // Update Chain to the valid vectorizable subchain.
+  Chain = Chain.slice(0, StopChain);
+  ChainSize = Chain.size();
 
   // Store size should be 1B, 2B or multiple of 4B.
   // TODO: Target hook for size constraint?
@@ -681,11 +716,11 @@
     DEBUG(dbgs() << "LSV: Size should be 1B, 2B "
                     "or multiple of 4B. Splitting.\n");
     if (SzInBytes == 3)
-      return vectorizeStoreChain(Chain.slice(0, ChainSize - 1));
+      return vectorizeStoreChain(Chain.slice(0, ChainSize - 1), InstructionsProcessed);
 
     auto Chains = splitOddVectorElts(Chain, Sz);
-    return vectorizeStoreChain(Chains.first) |
-           vectorizeStoreChain(Chains.second);
+    return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
+           vectorizeStoreChain(Chains.second, InstructionsProcessed);
   }
 
   VectorType *VecTy;
@@ -701,8 +736,8 @@
   if (ChainSize > VF) {
     DEBUG(dbgs() << "LSV: Vector factor is too big."
                     " Creating two separate arrays.\n");
-    return vectorizeStoreChain(Chain.slice(0, VF)) |
-           vectorizeStoreChain(Chain.slice(VF));
+    return vectorizeStoreChain(Chain.slice(0, VF), InstructionsProcessed) |
+           vectorizeStoreChain(Chain.slice(VF), InstructionsProcessed);
   }
 
   DEBUG({
@@ -711,6 +746,10 @@
       V->dump();
   });
 
+  // We won't try again to vectorize the elements of the chain, regardless of
+  // whether we succeed below.
+  InstructionsProcessed->append(Chain.begin(), Chain.end());
+
   // Check alignment restrictions.
   unsigned Alignment = getAlignment(S0);
 
@@ -730,12 +769,6 @@
     }
   }
 
-  BasicBlock::iterator First, Last;
-  std::tie(First, Last) = getBoundaryInstrs(Chain);
-
-  if (!isVectorizable(Chain, First, Last))
-    return false;
-
   // Set insert point.
   Builder.SetInsertPoint(&*Last);
 
@@ -783,7 +816,8 @@
   return true;
 }
 
-bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain) {
+bool Vectorizer::vectorizeLoadChain(ArrayRef<Value *> Chain,
+                                    SmallVector<Value *, 16> *InstructionsProcessed) {
   LoadInst *L0 = cast<LoadInst>(Chain[0]);
 
   // If the vector has an int element, default to int for the whole load.
@@ -806,8 +840,28 @@
   unsigned VF = VecRegSize / Sz;
   unsigned ChainSize = Chain.size();
 
-  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2)
+  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
+    InstructionsProcessed->append(Chain.begin(), Chain.end());
+    return false;
+  }
+
+  BasicBlock::iterator First, Last;
+  std::tie(First, Last) = getBoundaryInstrs(Chain);
+  unsigned StopChain = getVectorizableEnd(Chain, First, Last);
+  if (StopChain == 0) {
+    // There exists a side effect instruction, no vectorization possible.
+    InstructionsProcessed->append(Chain.begin(), Chain.end());
+    return false;
+  }
+  if (StopChain == 1) {
+    // Failed after the first instruction. Discard it and try the smaller chain.
+    InstructionsProcessed->push_back(Chain.front());
     return false;
+  }
+
+  // Update Chain to the valid vectorizable subchain.
+  Chain = Chain.slice(0, StopChain);
+  ChainSize = Chain.size();
 
   // Load size should be 1B, 2B or multiple of 4B.
   // TODO: Should size constraint be a target hook?
@@ -816,9 +870,10 @@
     DEBUG(dbgs() << "LSV: Size should be 1B, 2B "
                     "or multiple of 4B. Splitting.\n");
     if (SzInBytes == 3)
-      return vectorizeLoadChain(Chain.slice(0, ChainSize - 1));
+      return vectorizeLoadChain(Chain.slice(0, ChainSize - 1), InstructionsProcessed);
     auto Chains = splitOddVectorElts(Chain, Sz);
-    return vectorizeLoadChain(Chains.first) | vectorizeLoadChain(Chains.second);
+    return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
+           vectorizeLoadChain(Chains.second, InstructionsProcessed);
   }
 
   VectorType *VecTy;
@@ -834,10 +889,14 @@
   if (ChainSize > VF) {
     DEBUG(dbgs() << "LSV: Vector factor is too big. "
                     "Creating two separate arrays.\n");
-    return vectorizeLoadChain(Chain.slice(0, VF)) |
-           vectorizeLoadChain(Chain.slice(VF));
+    return vectorizeLoadChain(Chain.slice(0, VF), InstructionsProcessed) |
+           vectorizeLoadChain(Chain.slice(VF), InstructionsProcessed);
   }
 
+  // We won't try again to vectorize the elements of the chain, regardless of
+  // whether we succeed below.
+  InstructionsProcessed->append(Chain.begin(), Chain.end());
+
   // Check alignment restrictions.
   unsigned Alignment = getAlignment(L0);
 
@@ -863,12 +922,6 @@
       V->dump();
   });
 
-  BasicBlock::iterator First, Last;
-  std::tie(First, Last) = getBoundaryInstrs(Chain);
-
-  if (!isVectorizable(Chain, First, Last))
-    return false;
-
   // Set insert point.
   Builder.SetInsertPoint(&*First);
 
Index: test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
===================================================================
--- test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
+++ test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
@@ -48,8 +48,7 @@
 ; CHECK-LABEL: @chain_suffix(
 ; CHECK: load i32
 ; CHECK: store <2 x i32>
-; CHECK: load i32
-; CHECK: load i32
+; CHECK: load <2 x i32>
 define void @chain_suffix(i32* noalias %ptr) {
   %next.gep = getelementptr i32, i32* %ptr, i64 0
   %next.gep1 = getelementptr i32, i32* %ptr, i64 1
@@ -66,12 +65,9 @@
 
 
 ; CHECK-LABEL: @chain_prefix_suffix(
-; CHECK: load i32
-; CHECK: load i32
+; CHECK: load <2 x i32>
 ; CHECK: store <2 x i32>
-; CHECK: load i32
-; CHECK: load i32
-; CHECK: load i32
+; CHECK: load <3 x i32>
 define void  @chain_prefix_suffix(i32* noalias %ptr) {
   %next.gep = getelementptr i32, i32* %ptr, i64 0
   %next.gep1 = getelementptr i32, i32* %ptr, i64 1