Index: lib/Transforms/Scalar/LICM.cpp
===================================================================
--- lib/Transforms/Scalar/LICM.cpp
+++ lib/Transforms/Scalar/LICM.cpp
@@ -31,6 +31,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
@@ -70,6 +71,7 @@
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
 #include <algorithm>
+#include <list>
 #include <utility>
 using namespace llvm;
 
@@ -103,7 +105,7 @@
                                   const LoopSafetyInfo *SafetyInfo,
                                   TargetTransformInfo *TTI, bool &FreeInLoop);
 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
-                  LoopSafetyInfo *SafetyInfo,
+                  BasicBlock *Preheader, LoopSafetyInfo *SafetyInfo,
                   OptimizationRemarkEmitter *ORE);
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
                  const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
@@ -433,6 +435,198 @@
   return Changed;
 }
 
+// This is a helper class for hoistRegion to make it able to hoist control flow
+// in order to be able to hoist phis. The way this works is that we initially
+// start hoisting to the loop preheader, and when we see a loop invariant branch
+// we make note of this. When we then come to hoist an instruction that's
+// conditional on such a branch we duplicate the branch and the relevant control
+// flow, then hoist the instruction into the block corresponding to its original
+// block in the duplicated control flow.
+class ControlFlowHoister {
+private:
+  // Information about the loop we are hoisting from
+  LoopInfo *LI;
+  DominatorTree *DT;
+  Loop *CurLoop;
+
+  // A map of blocks in the loop to the block their instructions will be hoisted
+  // to.
+  DenseMap<BasicBlock *, BasicBlock *> HoistDestinationMap;
+
+  // The branches that we can hoist, mapped to the block that marks a
+  // convergence point of their control flow.
+  DenseMap<BranchInst *, BasicBlock *> HoistableBranches;
+
+  // We make a copy of the dominator tree children of the original preheader as
+  // we will need it whenever we insert a new preheader.
+  std::vector<DomTreeNode *> OriginalPreheaderChildren;
+
+public:
+  ControlFlowHoister(LoopInfo *LI, DominatorTree *DT, Loop *CurLoop)
+      : LI(LI), DT(DT), CurLoop(CurLoop) {
+    BasicBlock *OriginalPreheader = CurLoop->getLoopPreheader();
+    OriginalPreheaderChildren = DT->getNode(OriginalPreheader)->getChildren();
+  }
+
+  void registerPossiblyHoistableBranch(BranchInst *BI) {
+    // We can only hoist conditional branches with loop variant operands where
+    // both branch destinations are in the loop.
+    if (!BI->isConditional() || !CurLoop->hasLoopInvariantOperands(BI) ||
+        !CurLoop->contains(BI->getSuccessor(0)) ||
+        !CurLoop->contains(BI->getSuccessor(1)))
+      return;
+
+    // We can hoist BI if one branch destination is the successor of the other,
+    // or both have common successor. Find the common successor by taking the
+    // intersection of the two successor sets, adding the block itself to its
+    // successors to handle the case where one is the successor of the other.
+    // TODO: This could be expanded to allowing branches where both ends
+    // eventually converge to a single block.
+    BasicBlock *TrueDest = BI->getSuccessor(0);
+    BasicBlock *FalseDest = BI->getSuccessor(1);
+    SmallPtrSet<BasicBlock *, 4> TrueDestSucc = {TrueDest};
+    SmallPtrSet<BasicBlock *, 4> FalseDestSucc = {FalseDest};
+    for (BasicBlock *SuccBB : successors(TrueDest))
+      TrueDestSucc.insert(SuccBB);
+    for (BasicBlock *SuccBB : successors(FalseDest))
+      FalseDestSucc.insert(SuccBB);
+    set_intersect(TrueDestSucc, FalseDestSucc);
+    if (!TrueDestSucc.empty()) {
+      BasicBlock *CommonSucc = *TrueDestSucc.begin();
+      // The branch can't be a loop backedge, which we check by checking if the
+      // branch dominates the common successor.
+      // TODO: It would be better to check CommonSucc != CurLoop->getHeader(),
+      // but doing the check this way also avoids certain kinds of control flow
+      // (e.g. three-way conditions), that getHoistedBlock doesn't currently
+      // handle correctly.
+      if (DT->dominates(BI, CommonSucc))
+        HoistableBranches[BI] = CommonSucc;
+    }
+  }
+
+  bool canHoistPHI(PHINode *PN) {
+    // We can hoist phis if the block they are in is the target of hoistable
+    // branches which cover all of the predecessors of the block.
+    SmallPtrSet<BasicBlock *, 8> PredecessorBlocks;
+    BasicBlock *BB = PN->getParent();
+    for (BasicBlock *PredBB : predecessors(BB))
+      PredecessorBlocks.insert(PredBB);
+    for (auto &Pair : HoistableBranches) {
+      if (Pair.second == BB) {
+        // Which blocks are predecessors via this branch depends on if the
+        // branch is triangle-like or diamond-like.
+        if (Pair.first->getSuccessor(0) == BB) {
+          PredecessorBlocks.erase(Pair.first->getParent());
+          PredecessorBlocks.erase(Pair.first->getSuccessor(1));
+        } else if (Pair.first->getSuccessor(1) == BB) {
+          PredecessorBlocks.erase(Pair.first->getParent());
+          PredecessorBlocks.erase(Pair.first->getSuccessor(0));
+        } else {
+          PredecessorBlocks.erase(Pair.first->getSuccessor(0));
+          PredecessorBlocks.erase(Pair.first->getSuccessor(1));
+        }
+      }
+    }
+    // PredecessorBlocks will now be empty if for every predecessor of BB we
+    // found a hoistable branch source.
+    return PredecessorBlocks.empty();
+  }
+
+  BasicBlock *getHoistedBlock(BasicBlock *BB) {
+    // If BB has already been hoisted, return that
+    if (HoistDestinationMap.count(BB))
+      return HoistDestinationMap[BB];
+
+    // Check if this block is conditional based on a pending branch
+    BranchInst *BI = nullptr;
+    for (auto Pair : HoistableBranches) {
+      if ((Pair.first->getSuccessor(0) == BB && BB != Pair.second) ||
+          (Pair.first->getSuccessor(1) == BB && BB != Pair.second)) {
+        assert(!BI && "BB is expected to be the target of at most one branch");
+        BI = Pair.first;
+      }
+    }
+
+    // If not involved in a pending branch, hoist to preheader
+    BasicBlock *InitialPreheader = CurLoop->getLoopPreheader();
+    if (!BI) {
+      LLVM_DEBUG(dbgs() << "LICM using " << InitialPreheader->getName()
+                        << " as hoist destination for " << BB->getName()
+                        << "\n");
+      HoistDestinationMap[BB] = InitialPreheader;
+      return InitialPreheader;
+    }
+
+    LLVMContext &C = BB->getContext();
+    BasicBlock *TrueDest = BI->getSuccessor(0);
+    BasicBlock *FalseDest = BI->getSuccessor(1);
+    BasicBlock *CommonSucc = HoistableBranches[BI];
+    BasicBlock *HoistTarget = getHoistedBlock(BI->getParent());
+
+    // Create hoisted versions of blocks that currently don't have them
+    auto CreateHoistedBlock = [&](BasicBlock *Orig) {
+      if (HoistDestinationMap[Orig])
+        return HoistDestinationMap[Orig];
+      BasicBlock *New =
+          BasicBlock::Create(C, Orig->getName() + ".licm", Orig->getParent());
+      HoistDestinationMap[Orig] = New;
+      DT->addNewBlock(New, HoistTarget);
+      if (CurLoop->getParentLoop())
+        CurLoop->getParentLoop()->addBasicBlockToLoop(New, *LI);
+      LLVM_DEBUG(dbgs() << "LICM created " << New->getName()
+                        << " as hoist destination for " << Orig->getName()
+                        << "\n");
+      return New;
+    };
+    BasicBlock *HoistTrueDest = CreateHoistedBlock(TrueDest);
+    BasicBlock *HoistFalseDest = CreateHoistedBlock(FalseDest);
+    BasicBlock *HoistCommonSucc = CreateHoistedBlock(CommonSucc);
+
+    // Link up these blocks with branches
+    if (!HoistCommonSucc->getTerminator()) {
+      // The new common successor we've generated will branch to whatever that
+      // hoist target branched to.
+      BasicBlock *TargetSucc = HoistTarget->getSingleSuccessor();
+      assert(TargetSucc && "Expected hoist target to have a single successor");
+      HoistCommonSucc->moveBefore(TargetSucc);
+      BranchInst::Create(TargetSucc, HoistCommonSucc);
+    }
+    if (!HoistTrueDest->getTerminator()) {
+      HoistTrueDest->moveBefore(HoistCommonSucc);
+      BranchInst::Create(HoistCommonSucc, HoistTrueDest);
+    }
+    if (!HoistFalseDest->getTerminator()) {
+      HoistFalseDest->moveBefore(HoistCommonSucc);
+      BranchInst::Create(HoistCommonSucc, HoistFalseDest);
+    }
+
+    // If BI is being hoisted to what was originally the preheader then
+    // HoistCommonSucc will now be the new preheader.
+    if (HoistTarget == InitialPreheader) {
+      // Phis in the loop header now need to use the new preheader
+      InitialPreheader->replaceSuccessorsPhiUsesWith(HoistCommonSucc);
+      // The new preheader dominates what the original preheader dominated.
+      DomTreeNode *PreheaderNode = DT->getNode(HoistCommonSucc);
+      for (DomTreeNode *Child : OriginalPreheaderChildren)
+        DT->changeImmediateDominator(Child, PreheaderNode);
+      // The preheader hoist destination is now the new preheader, with the
+      // exception of the hoist destination of this branch.
+      for (auto &Pair : HoistDestinationMap)
+        if (Pair.second == InitialPreheader && Pair.first != BI->getParent())
+          Pair.second = HoistCommonSucc;
+    }
+
+    // Now finally hoist BI
+    ReplaceInstWithInst(
+        HoistTarget->getTerminator(),
+        BranchInst::Create(HoistTrueDest, HoistFalseDest, BI->getCondition()));
+
+    assert(CurLoop->getLoopPreheader() &&
+           "Hoisting blocks should not have destroyed preheader");
+    return HoistDestinationMap[BB];
+  }
+};
+
 /// Walk the specified region of the CFG (defined by all blocks dominated by
 /// the specified block, and that are in the current loop) in depth first
 /// order w.r.t the DominatorTree.  This allows us to visit definitions before
@@ -447,13 +641,39 @@
          CurLoop != nullptr && CurAST != nullptr && SafetyInfo != nullptr &&
          "Unexpected input to hoistRegion");
 
+  ControlFlowHoister CFH(LI, DT, CurLoop);
+
+  // Keep track of instructions that have been hoisted, as they may need to be
+  // re-hoisted if they end up not dominating all of their uses.
+  std::list<Instruction *> HoistedInstructions;
+
+  // Record what the original preheader is, as we'll need it later if we need to
+  // re-hoist instructions.
+  BasicBlock *OriginalPreheader = CurLoop->getLoopPreheader();
+
   // We want to visit parents before children. We will enque all the parents
   // before their children in the worklist and process the worklist in order.
   SmallVector<DomTreeNode *, 16> Worklist = collectChildrenInLoop(N, CurLoop);
 
   bool Changed = false;
-  for (DomTreeNode *DTN : Worklist) {
-    BasicBlock *BB = DTN->getBlock();
+  while (Worklist.size() > 0) {
+    // For PHI hoisting to work we need to hoist blocks before their successors.
+    // Do this by picking the first block that has no pending predecessors.
+    unsigned int idx = 0;
+    for (unsigned int i = 0; i < Worklist.size(); ++i) {
+      auto IsPredecessorOfWorklistI = [&](DomTreeNode *P) {
+        BasicBlock *BB1 = Worklist[i]->getBlock();
+        BasicBlock *BB2 = P->getBlock();
+        return BB1 != BB2 && llvm::is_contained(predecessors(BB1), BB2);
+      };
+      if (!llvm::any_of(Worklist, IsPredecessorOfWorklistI)) {
+        idx = i;
+        break;
+      }
+    }
+    BasicBlock *BB = Worklist[idx]->getBlock();
+    Worklist.erase(Worklist.begin() + idx);
+
     // Only need to process the contents of this block if it is not part of a
     // subloop (which would already have been processed).
     if (inSubLoop(BB, CurLoop, LI))
@@ -491,14 +711,17 @@
       // Try hoisting the instruction out to the preheader.  We can only do
       // this if all of the operands of the instruction are loop invariant and
       // if it is safe to hoist the instruction.
-      //
+      // TODO: It may be safe to hoist if we are hoisting to a conditional block
+      // and we have accurately duplicated the control flow from the loop header
+      // to that block.
       if (CurLoop->hasLoopInvariantOperands(&I) &&
           canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, true, ORE) &&
           (IsMustExecute ||
            isSafeToExecuteUnconditionally(
                I, DT, CurLoop, SafetyInfo, ORE,
                CurLoop->getLoopPreheader()->getTerminator()))) {
-        hoist(I, DT, CurLoop, SafetyInfo, ORE);
+        hoist(I, DT, CurLoop, CFH.getHoistedBlock(BB), SafetyInfo, ORE);
+        HoistedInstructions.push_front(&I);
         Changed = true;
         continue;
       }
@@ -521,7 +744,9 @@
         I.replaceAllUsesWith(Product);
         I.eraseFromParent();
 
-        hoist(*ReciprocalDivisor, DT, CurLoop, SafetyInfo, ORE);
+        hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getHoistedBlock(BB),
+              SafetyInfo, ORE);
+        HoistedInstructions.push_front(ReciprocalDivisor);
         Changed = true;
         continue;
       }
@@ -532,11 +757,31 @@
            isGuard(&I)) &&
           IsMustExecute && IsMemoryNotModified &&
           CurLoop->hasLoopInvariantOperands(&I)) {
-        hoist(I, DT, CurLoop, SafetyInfo, ORE);
+        hoist(I, DT, CurLoop, CFH.getHoistedBlock(BB), SafetyInfo, ORE);
+        HoistedInstructions.push_front(&I);
         Changed = true;
         continue;
       }
 
+      if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+        if (CurLoop->hasLoopInvariantOperands(PN) && CFH.canHoistPHI(PN)) {
+          // Redirect incoming blocks first to ensure that we create hoisted
+          // versions of those blocks before we hoist the phi.
+          for (unsigned int i = 0; i < PN->getNumIncomingValues(); ++i)
+            PN->setIncomingBlock(i,
+                                 CFH.getHoistedBlock(PN->getIncomingBlock(i)));
+          hoist(*PN, DT, CurLoop, CFH.getHoistedBlock(BB), SafetyInfo, ORE);
+          assert(DT->dominates(PN, BB) && "Conditional PHIs not expected");
+          Changed = true;
+          continue;
+        }
+      }
+
+      // Remember possibly hoistable branches so we can actually hoist them
+      // later if needed.
+      if (BranchInst *BI = dyn_cast<BranchInst>(&I))
+        CFH.registerPossiblyHoistableBranch(BI);
+
       if (IsMustExecute)
         IsMustExecute = isGuaranteedToTransferExecutionToSuccessor(&I);
       if (IsMemoryNotModified)
@@ -544,6 +789,22 @@
     }
   }
 
+  // If we hoisted instructions to a conditional block they may not dominate
+  // their uses that weren't hoisted. If so make them unconditional by moving
+  // them to the end of the original preheader, which is guaranteed to dominate
+  // everything in the loop. Instructions are pushed to the front of the list,
+  // so we iterate through them in first-in-last-out order which ensures that
+  // when we hoist an instruction we hoist its operands.
+  Instruction *HoistPoint = OriginalPreheader->getTerminator();
+  for (Instruction *I : HoistedInstructions) {
+    if (!llvm::all_of(I->uses(), [&](Use &U) { return DT->dominates(I, U); })) {
+      LLVM_DEBUG(dbgs() << "LICM rehoisting to " << OriginalPreheader->getName()
+                        << ": " << *I << "\n");
+      I->moveBefore(HoistPoint);
+      HoistPoint = I;
+    }
+  }
+
   return Changed;
 }
 
@@ -1099,8 +1360,8 @@
 /// is safe to hoist, this instruction is called to do the dirty work.
 ///
 static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
-                  LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE) {
-  auto *Preheader = CurLoop->getLoopPreheader();
+                  BasicBlock *Preheader, LoopSafetyInfo *SafetyInfo,
+                  OptimizationRemarkEmitter *ORE) {
   LLVM_DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": " << I
                     << "\n");
   ORE->emit([&]() {
@@ -1119,8 +1380,13 @@
       !isGuaranteedToExecute(I, DT, CurLoop, SafetyInfo))
     I.dropUnknownNonDebugMetadata();
 
-  // Move the new node to the Preheader, before its terminator.
-  I.moveBefore(Preheader->getTerminator());
+  if (isa<PHINode>(I)) {
+    // Move the new node to the end of the phi list in the preheader.
+    I.moveBefore(Preheader->getFirstNonPHI());
+  } else {
+    // Move the new node to the Preheader, before its terminator.
+    I.moveBefore(Preheader->getTerminator());
+  }
 
   // Do not retain debug locations when we are moving instructions to different
   // basic blocks, because we want to avoid jumpy line tables. Calls, however,
Index: test/Transforms/LICM/hoist-phi.ll
===================================================================
--- /dev/null
+++ test/Transforms/LICM/hoist-phi.ll
@@ -0,0 +1,873 @@
+; RUN: opt -S -licm < %s | FileCheck %s
+; RUN: opt -passes='require<opt-remark-emit>,loop(licm)' -S < %s | FileCheck %s
+
+; CHECK-LABEL: @triangle_phi
+define void @triangle_phi(i32 %x, i32* %p) {
+; CHECK-LABEL: entry:
+; CHECK: %cmp1 = icmp sgt i32 %x, 0
+; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: %add = add i32 %x, 1
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]:
+; CHECK: phi i32 [ %add, %[[IF_LICM]] ], [ %x, %entry ]
+; CHECK: store i32 %phi, i32* %p
+; CHECK: %cmp2 = icmp ne i32 %phi, 0
+; CHECK: br label %loop
+
+loop:
+  %cmp1 = icmp sgt i32 %x, 0
+  br i1 %cmp1, label %if, label %then
+
+if:
+  %add = add i32 %x, 1
+  br label %then
+
+then:
+  %phi = phi i32 [ %add, %if ], [ %x, %loop ]
+  store i32 %phi, i32* %p
+  %cmp2 = icmp ne i32 %phi, 0
+  br i1 %cmp2, label %loop, label %end
+
+end:
+  ret void
+}
+
+; CHECK-LABEL: @diamond_phi
+define void @diamond_phi(i32 %x, i32* %p) {
+; CHECK-LABEL: entry:
+; CHECK: %cmp1 = icmp sgt i32 %x, 0
+; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[ELSE_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: %add = add i32 %x, 1
+; CHECK: br label %[[THEN_LICM:.*]]
+
+; CHECK: [[ELSE_LICM]]:
+; CHECK: %sub = sub i32 %x, 1
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]
+; CHECK: %phi = phi i32 [ %add, %[[IF_LICM]] ], [ %sub, %[[ELSE_LICM]] ]
+; CHECK: store i32 %phi, i32* %p
+; CHECK: %cmp2 = icmp ne i32 %phi, 0
+; CHECK: br label %loop
+
+loop:
+  %cmp1 = icmp sgt i32 %x, 0
+  br i1 %cmp1, label %if, label %else
+
+if:
+  %add = add i32 %x, 1
+  br label %then
+
+else:
+  %sub = sub i32 %x, 1
+  br label %then
+
+then:
+  %phi = phi i32 [ %add, %if ], [ %sub, %else ]
+  store i32 %phi, i32* %p
+  %cmp2 = icmp ne i32 %phi, 0
+  br i1 %cmp2, label %loop, label %end
+
+end:
+  ret void
+}
+
+; This is currently too complicated for us to be able to hoist the phi.
+; CHECK-LABEL: @three_way_phi
+define void @three_way_phi(i32 %x, i32* %p) {
+; CHECK-LABEL: entry:
+; CHECK: %cmp1 = icmp sgt i32 %x, 0
+; CHECK: %add = add i32 %x, 1
+; CHECK: %cmp2 = icmp sgt i32 %add, 0
+; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[ELSE_LICM:.*]]
+
+; CHECK: [[IF_LICM]]:
+; CHECK: br label %[[THEN_LICM:.*]]
+
+; CHECK: [[THEN_LICM]]:
+; CHECK: %sub = sub i32 %x, 1
+; CHECK: br label %loop
+
+entry:
+  br label %loop
+
+loop:
+  %cmp1 = icmp sgt i32 %x, 0
+  br i1 %cmp1, label %if, label %then
+
+if:
+  %add = add i32 %x, 1
+  %cmp2 = icmp sgt i32 %add, 0
+  br i1 %cmp2, label %if.if, label %then
+
+if.if:
+  %sub = sub i32 %x, 1
+  br label %then
+
+then:
+  %phi = phi i32 [ 0, %loop ], [ %add, %if ], [ %sub, %if.if ]
+  store i32 %phi, i32* %p
+  %cmp3 = icmp ne i32 %phi, 0
+  br i1 %cmp3, label %loop, label %end
+
+end:
+  ret void
+}
+
+; This is currently too complicated for us to be able to hoist the phi.
+; CHECK-LABEL: @tree_phi
+define void @tree_phi(i32 %x, i32* %p) {
+; CHECK-LABEL: entry:
+; CHECK: %cmp1 = icmp sgt i32 %x, 0
+; CHECK: %add = add i32 %x, 1
+; CHECK: %cmp2 = icmp sgt i32 %add, 0
+; CHECK: %sub = sub i32 %x, 1
+; CHECK: br label %loop
+
+entry:
+  br label %loop
+
+loop:
+  %cmp1 = icmp sgt i32 %x, 0
+  br i1 %cmp1, label %if, label %else
+
+if:
+  %add = add i32 %x, 1
+  %cmp2 = icmp sgt i32 %add, 0
+  br i1 %cmp2, label %if.if, label %if.else
+
+if.if:
+  br label %then
+
+if.else:
+  br label %then
+
+else:
+  %sub = sub i32 %x, 1
+  br label %then
+
+then:
+  %phi = phi i32 [ %add, %if.if ], [ 0, %if.else ], [ %sub, %else ]
+  store i32 %phi, i32* %p
+  %cmp3 = icmp ne i32 %phi, 0
+  br i1 %cmp3, label %loop, label %end
+
+end:
+  ret void
+}
+
+; We can hoist the first phi, but not the second.
+; CHECK-LABEL: @phi_phi
+define void @phi_phi(i32 %x, i32* %p) {
+; CHECK-LABEL: entry:
+; CHECK: %cmp1 = icmp sgt i32 %x, 0
+; CHECK: %add = add i32 %x, 1
+; CHECK: %cmp2 = icmp sgt i32 %add, 0
+; CHECK: %sub = sub i32 %x, 1
+; CHECK: br i1 %cmp2, label %[[IF_IF_LICM:.*]], label %[[IF_ELSE_LICM:.*]]
+
+; CHECK: [[IF_IF_LICM]]:
+; CHECK: br label %[[IF_THEN_LICM:.*]]
+
+; CHECK: [[IF_ELSE_LICM]]:
+; CHECK: br label %[[IF_THEN_LICM]]
+
+; CHECK: [[IF_THEN_LICM]]:
+; CHECK: %phi1 = phi i32 [ %add, %[[IF_IF_LICM]] ], [ 0, %[[IF_ELSE_LICM]] ]
+; CHECK: br label %loop
+
+entry:
+  br label %loop
+
+loop:
+  %cmp1 = icmp sgt i32 %x, 0
+  br i1 %cmp1, label %if, label %else
+
+if:
+  %add = add i32 %x, 1
+  %cmp2 = icmp sgt i32 %add, 0
+  br i1 %cmp2, label %if.if, label %if.else
+
+if.if:
+  br label %if.then
+
+if.else:
+  br label %if.then
+
+if.then:
+  %phi1 = phi i32 [ %add, %if.if ], [ 0, %if.else ]
+  br label %then
+
+else:
+  %sub = sub i32 %x, 1
+  br label %then
+
+then:
+  %phi2 = phi i32 [ %phi1, %if.then ], [ %sub, %else ]
+  store i32 %phi2, i32* %p
+  %cmp3 = icmp ne i32 %phi2, 0
+  br i1 %cmp3, label %loop, label %end
+
+end:
+  ret void
+}
+
+; Check that we correctly duplicate empty control flow.
+; CHECK-LABEL: @empty_triangle_phi
+define i8 @empty_triangle_phi(i32 %x, i32 %y) {
+; CHECK-LABEL: entry:
+; CHECK: %cmp1 = icmp eq i32 %x, 0
+; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]:
+; CHECK: %phi = phi i8 [ 0, %[[IF_LICM]] ], [ 1, %entry ]
+; CHECK: %cmp2 = icmp eq i32 %y, 0
+; CHECK: br label %loop
+
+loop:
+  %cmp1 = icmp eq i32 %x, 0
+  br i1 %cmp1, label %if, label %then
+
+if:
+  br label %then
+
+then:
+  %phi = phi i8 [ 0, %if ], [ 1, %loop ]
+  %cmp2 = icmp eq i32 %y, 0
+  br i1 %cmp2, label %end, label %loop
+
+end:
+  ret i8 %phi
+}
+
+; CHECK-LABEL: @empty_diamond_phi
+define i8 @empty_diamond_phi(i32 %x, i32 %y) {
+; CHECK-LABEL: entry:
+; CHECK: %cmp1 = icmp eq i32 %x, 0
+; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[ELSE_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: br label %[[THEN_LICM:.*]]
+
+; CHECK: [[ELSE_LICM]]:
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]:
+; CHECK: %phi = phi i8 [ 0, %[[IF_LICM]] ], [ 1, %[[ELSE_LICM]] ]
+; CHECK: %cmp2 = icmp eq i32 %y, 0
+; CHECK: br label %loop
+
+loop:
+  %cmp1 = icmp eq i32 %x, 0
+  br i1 %cmp1, label %if, label %else
+
+if:
+  br label %then
+
+else:
+  br label %then
+
+then:
+  %phi = phi i8 [ 0, %if ], [ 1, %else ]
+  %cmp2 = icmp eq i32 %y, 0
+  br i1 %cmp2, label %end, label %loop
+
+end:
+  ret i8 %phi
+}
+
+; Check that we correctly handle the case that the first thing we try to hoist is a phi.
+; CHECK-LABEL: @empty_triangle_phi_first
+define i8 @empty_triangle_phi_first(i32 %x, i1 %cond) {
+; CHECK-LABEL: entry:
+; CHECK: br i1 %cond, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]:
+; CHECK: %phi = phi i8 [ 0, %[[IF_LICM]] ], [ 1, %entry ]
+; CHECK: %cmp = icmp eq i32 %x, 0
+; CHECK: br label %loop
+
+loop:
+  br i1 %cond, label %if, label %then
+
+if:
+  br label %then
+
+then:
+  %phi = phi i8 [ 0, %if ], [ 1, %loop ]
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %end, label %loop
+
+end:
+  ret i8 %phi
+}
+
+; CHECK-LABEL: @empty_diamond_phi
+define i8 @empty_diamond_phi_first(i32 %x, i1 %cond) {
+; CHECK-LABEL: entry:
+; CHECK: br i1 %cond, label %[[IF_LICM:.*]], label %[[ELSE_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: br label %[[THEN_LICM:.*]]
+
+; CHECK: [[ELSE_LICM]]:
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]:
+; CHECK: %phi = phi i8 [ 0, %[[IF_LICM]] ], [ 1, %[[ELSE_LICM]] ]
+; CHECK: %cmp = icmp eq i32 %x, 0
+; CHECK: br label %loop
+
+loop:
+  br i1 %cond, label %if, label %else
+
+if:
+  br label %then
+
+else:
+  br label %then
+
+then:
+  %phi = phi i8 [ 0, %if ], [ 1, %else ]
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %end, label %loop
+
+end:
+  ret i8 %phi
+}
+
+; CHECK-LABEL: @empty_triangle_phi_first
+define i8 @empty_triangle_phi_first_empty_loop_head(i32 %x, i1 %cond) {
+; CHECK-LABEL: entry:
+; CHECK: br i1 %cond, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]:
+; CHECK: %phi = phi i8 [ 0, %[[IF_LICM]] ], [ 1, %entry ]
+; CHECK: %cmp = icmp eq i32 %x, 0
+; CHECK: br label %loop
+
+loop:
+  br label %test
+
+test:
+  br i1 %cond, label %if, label %then
+
+if:
+  br label %then
+
+then:
+  %phi = phi i8 [ 0, %if ], [ 1, %test ]
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %end, label %loop
+
+end:
+  ret i8 %phi
+}
+
+; CHECK-LABEL: @empty_diamond_phi_first_empty_loop_head
+define i8 @empty_diamond_phi_first_empty_loop_head(i32 %x, i1 %cond) {
+; CHECK-LABEL: entry:
+; CHECK: br i1 %cond, label %[[IF_LICM:.*]], label %[[ELSE_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: br label %[[THEN_LICM:.*]]
+
+; CHECK: [[ELSE_LICM]]:
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]:
+; CHECK: %phi = phi i8 [ 0, %[[IF_LICM]] ], [ 1, %[[ELSE_LICM]] ]
+; CHECK: %cmp = icmp eq i32 %x, 0
+; CHECK: br label %loop
+
+loop:
+  br label %test
+
+test:
+  br i1 %cond, label %if, label %else
+
+if:
+  br label %then
+
+else:
+  br label %then
+
+then:
+  %phi = phi i8 [ 0, %if ], [ 1, %else ]
+  %cmp = icmp eq i32 %x, 0
+  br i1 %cmp, label %end, label %loop
+
+end:
+  ret i8 %phi
+}
+
+; The phi is on one branch of a diamond while simultaneously at the end of a
+; triangle. Check that we duplicate the triangle and not the diamond.
+; CHECK-LABEL: @triangle_diamond
+define void @triangle_diamond(i32* %ptr, i32 %x, i32 %y) {
+; CHECK-LABEL: entry:
+; CHECK: %cmp1 = icmp ne i32 %x, 0
+; CHECK: %cmp2 = icmp ne i32 %y, 0
+; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]:
+; CHECK: %phi = phi i32 [ 0, %[[IF_LICM]] ], [ 127, %entry ]
+
+loop:
+  %cmp1 = icmp ne i32 %x, 0
+  br i1 %cmp1, label %if, label %then
+
+if:
+  %cmp2 = icmp ne i32 %y, 0
+  br i1 %cmp2, label %if.then, label %then
+
+then:
+  %phi = phi i32 [ 0, %if ], [ 127, %loop ]
+  store i32 %phi, i32* %ptr
+  br label %end
+
+if.then:
+  br label %end
+
+end:
+  br label %loop
+}
+
+; As the previous, but the end of the diamond is the head of the loop.
+; CHECK-LABEL: @triangle_diamond_backedge
+define void @triangle_diamond_backedge(i32* %ptr, i32 %x, i32 %y) {
+; CHECK-LABEL: entry:
+; CHECK: %cmp1 = icmp ne i32 %x, 0
+; CHECK: %cmp2 = icmp ne i32 %y, 0
+; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]:
+; CHECK: %phi = phi i32 [ 0, %[[IF_LICM]] ], [ 127, %entry ]
+
+loop:
+  %cmp1 = icmp ne i32 %x, 0
+  br i1 %cmp1, label %if, label %then
+
+if:
+  %cmp2 = icmp ne i32 %y, 0
+  br i1 %cmp2, label %backedge, label %then
+
+then:
+  %phi = phi i32 [ 0, %if ], [ 127, %loop ]
+  store i32 %phi, i32* %ptr
+  br label %loop
+
+backedge:
+  br label %loop
+}
+
+; The inner diamonds can be hoisted, but not currently the outer diamond
+; CHECK-LABEL: @diamonds_inside_diamond
+define void @diamonds_inside_diamond(i32 %x, i32* %p) {
+; CHECK-LABEL: entry:
+; CHECK: %cmp1 = icmp sgt i32 %x, 0
+; CHECK: %cmp2 = icmp sgt i32 %x, 10
+; CHECK: %cmp3 = icmp slt i32 %x, -10
+; CHECK: br i1 %cmp2, label %[[IF_IF_LICM:.*]], label %[[IF_ELSE_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_IF_LICM]]:
+; CHECK: br label %[[IF_THEN_LICM:.*]]
+
+; CHECK: [[IF_ELSE_LICM]]:
+; CHECK: br label %[[IF_THEN_LICM]]
+
+; CHECK: [[IF_THEN_LICM]]:
+; CHECK: %phi1 = phi i32 [ 0, %[[IF_IF_LICM]] ], [ 1, %[[IF_ELSE_LICM]] ]
+; CHECK: br i1 %cmp3, label %[[ELSE_IF_LICM:.*]], label %[[ELSE_ELSE_LICM:.*]]
+
+; CHECK: [[ELSE_IF_LICM]]:
+; CHECK: br label %[[ELSE_THEN_LICM:.*]]
+
+; CHECK: [[ELSE_ELSE_LICM]]:
+; CHECK: br label %[[ELSE_THEN_LICM]]
+
+; CHECK: [[ELSE_THEN_LICM]]:
+; CHECK: %phi2 = phi i32 [ 2, %[[ELSE_IF_LICM]] ], [ 3, %[[ELSE_ELSE_LICM]] ]
+; CHECK: br label %loop
+
+loop:
+  %cmp1 = icmp sgt i32 %x, 0
+  br i1 %cmp1, label %if, label %else
+
+if:
+  %cmp2 = icmp sgt i32 %x, 10
+  br i1 %cmp2, label %if.if, label %if.else
+
+if.if:
+  br label %if.then
+
+if.else:
+  br label %if.then
+
+if.then:
+  %phi1 = phi i32 [ 0, %if.if ], [ 1, %if.else ]
+  br label %then
+
+else:
+  %cmp3 = icmp slt i32 %x, -10
+  br i1 %cmp3, label %else.if, label %else.else
+
+else.if:
+  br label %else.then
+
+else.else:
+  br label %else.then
+
+else.then:
+  %phi2 = phi i32 [ 2, %else.if ], [ 3, %else.else ]
+  br label %then
+
+then:
+  %phi3 = phi i32 [ %phi1, %if.then ], [ %phi2, %else.then ]
+  store i32 %phi3, i32* %p
+  %cmp4 = icmp ne i32 %phi3, 0
+  br i1 %cmp4, label %loop, label %end
+
+end:
+  ret void
+}
+
+; We can hoist blocks that contain an edge that exits the loop by ignoring that
+; edge in the hoisted block.
+; CHECK-LABEL: @triangle_phi_loopexit
+define void @triangle_phi_loopexit(i32 %x, i32* %p) {
+; CHECK-LABEL: entry:
+; CHECK: %add = add i32 %x, 1
+; CHECK: %cmp1 = icmp sgt i32 %x, 0
+; CHECK: %cmp2 = icmp sgt i32 10, %add
+; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]:
+; CHECK: %phi = phi i32 [ %add, %[[IF_LICM]] ], [ %x, %entry ]
+
+loop:
+  %cmp1 = icmp sgt i32 %x, 0
+  br i1 %cmp1, label %if, label %then
+
+if:
+  %add = add i32 %x, 1
+  %cmp2 = icmp sgt i32 10, %add
+  br i1 %cmp2, label %then, label %end
+
+then:
+  %phi = phi i32 [ %add, %if ], [ %x, %loop ]
+  store i32 %phi, i32* %p
+  %cmp3 = icmp ne i32 %phi, 0
+  br i1 %cmp3, label %loop, label %end
+
+end:
+  ret void
+}
+
+; CHECK-LABEL: @diamond_phi_oneloopexit
+define void @diamond_phi_oneloopexit(i32 %x, i32* %p) {
+; CHECK-LABEL: entry:
+; CHECK: %add = add i32 %x, 1
+; CHECK: %cmp1 = icmp sgt i32 %x, 0
+; CHECK: %cmp2 = icmp sgt i32 10, %add
+; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: br label %[[THEN_LICM:.*]]
+
+; CHECK: [[ELSE_LICM]]:
+; CHECK: %sub = sub i32 %x, 1
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]
+; CHECK: %phi = phi i32 [ %add, %[[IF_LICM]] ], [ %sub, %[[ELSE_LICM]] ]
+; CHECK: %cmp3 = icmp ne i32 %phi, 0
+; CHECK: br label %loop
+
+loop:
+  %cmp1 = icmp sgt i32 %x, 0
+  br i1 %cmp1, label %if, label %else
+
+if:
+  %add = add i32 %x, 1
+  %cmp2 = icmp sgt i32 10, %add
+  br i1 %cmp2, label %then, label %end
+
+else:
+  %sub = sub i32 %x, 1
+  br label %then
+
+then:
+  %phi = phi i32 [ %add, %if ], [ %sub, %else ]
+  store i32 %phi, i32* %p
+  %cmp3 = icmp ne i32 %phi, 0
+  br i1 %cmp3, label %loop, label %end
+
+end:
+  ret void
+}
+
+; CHECK-LABEL: @diamond_phi_twoloopexit
+define void @diamond_phi_twoloopexit(i32 %x, i32* %p) {
+; CHECK-LABEL: entry:
+; CHECK: %sub = sub i32 %x, 1
+; CHECK: %add = add i32 %x, 1
+; CHECK: %cmp1 = icmp sgt i32 %x, 0
+; CHECK: %cmp2 = icmp sgt i32 10, %add
+; CHECK: %cmp3 = icmp sgt i32 10, %sub
+; CHECK: br i1 %cmp1, label %[[IF_LICM:.*]], label %[[THEN_LICM:.*]]
+entry:
+  br label %loop
+
+; CHECK: [[IF_LICM]]:
+; CHECK: br label %[[THEN_LICM:.*]]
+
+; CHECK: [[ELSE_LICM]]:
+; CHECK: br label %[[THEN_LICM]]
+
+; CHECK: [[THEN_LICM]]
+; CHECK: %phi = phi i32 [ %add, %[[IF_LICM]] ], [ %sub, %[[ELSE_LICM]] ]
+; CHECK: %cmp4 = icmp ne i32 %phi, 0
+; CHECK: br label %loop
+
+loop:
+  %cmp1 = icmp sgt i32 %x, 0
+  br i1 %cmp1, label %if, label %else
+
+if:
+  %add = add i32 %x, 1
+  %cmp2 = icmp sgt i32 10, %add
+  br i1 %cmp2, label %then, label %end
+
+else:
+  %sub = sub i32 %x, 1
+  %cmp3 = icmp sgt i32 10, %sub
+  br i1 %cmp3, label %then, label %end
+
+then:
+  %phi = phi i32 [ %add, %if ], [ %sub, %else ]
+  store i32 %phi, i32* %p
+  %cmp4 = icmp ne i32 %phi, 0
+  br i1 %cmp4, label %loop, label %end
+
+end:
+  ret void
+}
+
+; The store cannot be hoisted, so add and shr cannot be hoisted into a
+; conditional block.
+; CHECK-LABEL: @conditional_use
+define void @conditional_use(i32 %x, i32* %p) {
+; CHECK-LABEL: entry:
+; CHECK: %cond = icmp ugt i32 %x, 0
+; CHECK: %add = add i32 %x, 5
+; CHECK: %shr = ashr i32 %add, 1
+; CHECK: br label %loop
+entry:
+  br label %loop
+
+loop:
+  %cond = icmp ugt i32 %x, 0
+  br i1 %cond, label %if, label %else
+
+; CHECK-LABEL: if:
+; CHECK: store i32 %shr, i32* %p, align 4
+if:
+  %add = add i32 %x, 5
+  %shr = ashr i32 %add, 1
+  store i32 %shr, i32* %p, align 4
+  br label %then
+
+else:
+  br label %then
+
+then:
+  br label %loop
+}
+
+; A diamond with two triangles on the left and one on the right. This test is
+; to check that we have a unique loop preheader when we hoist the store (and so
+; don't fail an assertion).
+; CHECK-LABEL: @triangles_in_diamond
+define void @triangles_in_diamond(i32* %ptr) {
+; CHECK-LABEL: entry:
+; CHECK: store i32 0, i32* %ptr, align 4
+; CHECK: br label %loop
+entry:
+  br label %loop
+
+loop:
+  br i1 undef, label %left_triangle_1, label %right_triangle
+
+left_triangle_1:
+  br i1 undef, label %left_triangle_1_if, label %left_triangle_2
+
+left_triangle_1_if:
+  br label %left_triangle_2
+
+left_triangle_2:
+  br i1 undef, label %left_triangle_2_if, label %left_triangle_2_then
+
+left_triangle_2_if:
+  br label %left_triangle_2_then
+
+left_triangle_2_then:
+  br label %loop.end
+
+right_triangle:
+  br i1 undef, label %right_triangle.if, label %right_triangle.then
+
+right_triangle.if:
+  br label %right_triangle.then
+
+right_triangle.then:
+  br label %loop.end
+
+loop.end:
+  store i32 0, i32* %ptr, align 4
+  br label %loop
+}
+
+; %cmp dominates its used after being hoisted, but not after %brmerge is rehoisted
+; CHECK-LABEL: @rehoist
+define void @rehoist(i8* %this, i32 %x) {
+; CHECK-LABEL: entry:
+; CHECK: %sub = add nsw i32 %x, -1
+; CHECK: %fptr = bitcast i8* %this to void (i8*)*
+; CHECK: %cmp = icmp eq i32 0, %sub
+; CHECK: %brmerge = or i1 %cmp, true
+entry:
+  %sub = add nsw i32 %x, -1
+  br label %loop
+
+loop:
+  br i1 undef, label %if1, label %else1
+
+if1:
+  %fptr = bitcast i8* %this to void (i8*)*
+  call void %fptr(i8* %this)
+  br label %then1
+
+else1:
+  br label %then1
+
+then1:
+  %cmp = icmp eq i32 0, %sub
+  br i1 %cmp, label %end, label %else2
+
+else2:
+  %brmerge = or i1 %cmp, true
+  br i1 %brmerge, label %if3, label %end
+
+if3:
+  br label %end
+
+end:
+  br label %loop
+}
+
+; A test case that uses empty blocks in a way that can cause control flow
+; hoisting to get confused.
+; CHECK-LABEL: @empty_blocks_multiple_conditional_branches
+define void @empty_blocks_multiple_conditional_branches(float %arg, float* %ptr) {
+; CHECK-LABEL: entry
+; CHECK: %div1 = fmul float %arg, 4.000000e+00
+; CHECK: %div2 = fmul float %arg, 2.000000e+00
+entry:
+  br label %loop
+
+; The exact path to the phi isn't checked here, because it depends on whether
+; cond2 or cond3 is hoisted first
+; CHECK: %phi = phi float [ 0.000000e+00, %{{.*}} ], [ %div1, %{{.*}} ]
+; CHECK: br label %loop
+
+loop:
+  br i1 undef, label %backedge2, label %cond1
+
+cond1:
+  br i1 undef, label %cond1.if, label %cond1.else
+
+cond1.else:
+  br label %cond3
+
+cond1.if:
+  br label %cond1.if.next
+
+cond1.if.next:
+  br label %cond2
+
+cond2:
+  %div1 = fmul float %arg, 4.000000e+00
+  br i1 undef, label %cond2.if, label %cond2.then
+
+cond2.if:
+  br label %cond2.then
+
+cond2.then:
+  %phi = phi float [ 0.000000e+00, %cond2 ], [ %div1, %cond2.if ]
+  store float %phi, float* %ptr
+  br label %backedge2
+
+cond3:
+  br i1 undef, label %cond3.then, label %cond3.if
+
+cond3.if:
+  %div2 = fmul float %arg, 2.000000e+00
+  store float %div2, float* %ptr
+  br label %cond3.then
+
+cond3.then:
+  br label %loop
+
+backedge2:
+  br label %loop
+}
Index: test/Transforms/LoopVectorize/invariant-store-vectorization.ll
===================================================================
--- test/Transforms/LoopVectorize/invariant-store-vectorization.ll
+++ test/Transforms/LoopVectorize/invariant-store-vectorization.ll
@@ -199,10 +199,14 @@
 ; invariant val stored to invariant address predicated on invariant condition
 ; This is not treated as a predicated store since the block the store belongs to
 ; is the latch block (which doesn't need to be predicated).
-; TODO: We should vectorize this loop once we relax the check for
-; variant/invariant values being stored to invariant address.
+; This can be vectorized as LICM will hoist storeval.
 ; CHECK-LABEL: inv_val_store_to_inv_address_conditional_inv
-; CHECK-NOT: <4 x
+; CHECK-LABEL: vector.body:
+; CHECK:         store <4 x i32>
+; CHECK:         store i32 %storeval, i32* %a
+; CHECK-NEXT:    %index.next = add i64 %index, 4
+; CHECK-NEXT:    icmp eq i64 %index.next, %n.vec
+; CHECK-NEXT:    br i1
 define void @inv_val_store_to_inv_address_conditional_inv(i32* %a, i64 %n, i32* %b, i32 %k) {
 entry:
   %ntrunc = trunc i64 %n to i32