diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -522,7 +522,8 @@
   /// the corresponding type.
   void widenIntOrFpInduction(PHINode *IV, Value *Start, TruncInst *Trunc,
                              VPValue *Def, VPValue *CastDef, VPValue *StepDef,
-                             VPValue *PhiDef, VPTransformState &State);
+                             VPValue *PhiDef, VPTransformState &State,
+                             Value *PrimInd);
 
   /// Construct the vector value of a scalarized value \p V one lane at a time.
   void packScalarIntoVectorValue(VPValue *Def, const VPIteration &Instance,
@@ -583,9 +584,7 @@
                     Value *CountRoundDown, Value *EndValue,
                     BasicBlock *MiddleBlock);
 
-  /// Create a new induction variable inside L.
-  PHINode *createInductionVariable(Loop *L, Value *Start, Value *End,
-                                   Value *Step, Instruction *DL);
+  void createLatchTerminator(Loop *L);
 
   /// Handle all cross-iteration phis in the header.
   void fixCrossIterationPHIs(VPTransformState &State);
@@ -829,9 +828,6 @@
   /// A list of all bypass blocks. The first block is the entry of the loop.
   SmallVector<BasicBlock *, 4> LoopBypassBlocks;
 
-  /// The new Induction variable which was added to the new block.
-  PHINode *Induction = nullptr;
-
   /// The induction variable of the old basic block.
   PHINode *OldInduction = nullptr;
 
@@ -2400,7 +2396,8 @@
 
 void InnerLoopVectorizer::widenIntOrFpInduction(
     PHINode *IV, Value *Start, TruncInst *Trunc, VPValue *Def, VPValue *CastDef,
-    VPValue *StepDef, VPValue *PhiDef, VPTransformState &State) {
+    VPValue *StepDef, VPValue *PhiDef, VPTransformState &State,
+    Value *PrimInd) {
   assert((IV->getType()->isIntegerTy() || IV != OldInduction) &&
          "Primary induction variable must have an integer type");
 
@@ -2434,12 +2431,12 @@
   // induction variable and step. Otherwise, derive these values from the
   // induction descriptor.
   auto CreateScalarIV = [&](Value *&Step) -> Value * {
-    Value *ScalarIV = Induction;
+    Value *ScalarIV = PrimInd;
     if (IV != OldInduction) {
-      ScalarIV = IV->getType()->isIntegerTy()
-                     ? Builder.CreateSExtOrTrunc(Induction, IV->getType())
-                     : Builder.CreateCast(Instruction::SIToFP, Induction,
-                                          IV->getType());
+      ScalarIV =
+          IV->getType()->isIntegerTy()
+              ? Builder.CreateSExtOrTrunc(PrimInd, IV->getType())
+              : Builder.CreateCast(Instruction::SIToFP, PrimInd, IV->getType());
       ScalarIV = emitTransformedIndex(Builder, ScalarIV, PSE.getSE(), DL, ID);
       ScalarIV->setName("offset.idx");
     }
@@ -3080,9 +3077,7 @@
     PredicatedInstructions.push_back(Cloned);
 }
 
-PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start,
-                                                      Value *End, Value *Step,
-                                                      Instruction *DL) {
+void InnerLoopVectorizer::createLatchTerminator(Loop *L) {
   BasicBlock *Header = L->getHeader();
   BasicBlock *Latch = L->getLoopLatch();
   // As we're just creating this loop, it's possible no latch exists
@@ -3092,31 +3087,15 @@
 
   IRBuilder<> B(&*Header->getFirstInsertionPt());
   Instruction *OldInst = getDebugLocFromInstOrOperands(OldInduction);
-  setDebugLocFromInst(OldInst, &B);
-  auto *Induction = B.CreatePHI(Start->getType(), 2, "index");
 
   B.SetInsertPoint(Latch->getTerminator());
   setDebugLocFromInst(OldInst, &B);
 
-  // Create i+1 and fill the PHINode.
-  //
-  // If the tail is not folded, we know that End - Start >= Step (either
-  // statically or through the minimum iteration checks). We also know that both
-  // Start % Step == 0 and End % Step == 0. We exit the vector loop if %IV +
-  // %Step == %End. Hence we must exit the loop before %IV + %Step unsigned
-  // overflows and we can mark the induction increment as NUW.
-  Value *Next = B.CreateAdd(Induction, Step, "index.next",
-                            /*NUW=*/!Cost->foldTailByMasking(), /*NSW=*/false);
-  Induction->addIncoming(Start, L->getLoopPreheader());
-  Induction->addIncoming(Next, Latch);
   // Create the compare.
-  Value *ICmp = B.CreateICmpEQ(Next, End);
-  B.CreateCondBr(ICmp, L->getUniqueExitBlock(), Header);
+  B.CreateCondBr(B.getTrue(), L->getUniqueExitBlock(), Header);
 
   // Now we have two terminators. Remove the old one from the block.
   Latch->getTerminator()->eraseFromParent();
-
-  return Induction;
 }
 
 Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) {
@@ -3736,17 +3715,12 @@
   //   - counts from zero, stepping by one
   //   - is the size of the widest induction variable type
   // then we create a new one.
-  OldInduction = Legal->getPrimaryInduction();
-  Type *IdxTy = Legal->getWidestInductionType();
-  Value *StartIdx = ConstantInt::get(IdxTy, 0);
+
   // The loop step is equal to the vectorization factor (num of SIMD elements)
   // times the unroll factor (num of SIMD instructions).
   Builder.SetInsertPoint(&*Lp->getHeader()->getFirstInsertionPt());
-  Value *Step = createStepForVF(Builder, ConstantInt::get(IdxTy, UF), VF);
   Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
-  Induction =
-      createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
-                              getDebugLocFromInstOrOperands(OldInduction));
+  createLatchTerminator(Lp);
 
   // Emit phis for the new starting index of the scalar loop.
   createInductionResumeValues(Lp, CountRoundDown);
@@ -4716,6 +4690,10 @@
   InductionDescriptor II = Legal->getInductionVars().lookup(P);
   const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
 
+  auto *Plan = PhiR->getParent()->getPlan();
+  auto *IVR = Plan->getCanonicalIV();
+  PHINode *PrimInd = cast<PHINode>(State.get(IVR->getVPSingleValue(), 0));
+
   // FIXME: The newly created binary instructions should contain nsw/nuw flags,
   // which can be found from the original scalar operations.
   switch (II.getKind()) {
@@ -4731,7 +4709,7 @@
     if (Cost->isScalarAfterVectorization(P, State.VF)) {
       // This is the normalized GEP that starts counting at zero.
       Value *PtrInd =
-          Builder.CreateSExtOrTrunc(Induction, II.getStep()->getType());
+          Builder.CreateSExtOrTrunc(PrimInd, II.getStep()->getType());
       // Determine the number of scalars we need to generate for each unroll
       // iteration. If the instruction is uniform, we only need to generate the
       // first lane. Otherwise, we generate all VF values.
@@ -4788,7 +4766,7 @@
     Value *ScalarStartValue = II.getStartValue();
     Type *ScStValueType = ScalarStartValue->getType();
     PHINode *NewPointerPhi =
-        PHINode::Create(ScStValueType, 2, "pointer.phi", Induction);
+        PHINode::Create(ScStValueType, 2, "pointer.phi", PrimInd);
     NewPointerPhi->addIncoming(ScalarStartValue, LoopVectorPreHeader);
 
     // A pointer induction, performed by using a gep
@@ -8224,7 +8202,7 @@
   VPTransformState State{LI, DT, ILV.Builder, &ILV, &BestVPlan};
   State.CFG.PrevBB = ILV.createVectorizedLoopSkeleton();
   State.TripCount = ILV.getOrCreateTripCount(nullptr);
-  State.CanonicalIV = ILV.Induction;
+  State.VectorTripCount = ILV.getOrCreateVectorTripCount(nullptr);
 
   ILV.printDebugTracesAtStart();
 
@@ -8404,14 +8382,9 @@
 
   // Generate the induction variable.
   OldInduction = Legal->getPrimaryInduction();
-  Type *IdxTy = Legal->getWidestInductionType();
-  Value *StartIdx = ConstantInt::get(IdxTy, 0);
-  Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF);
   Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
   EPI.VectorTripCount = CountRoundDown;
-  Induction =
-      createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
-                              getDebugLocFromInstOrOperands(OldInduction));
+  createLatchTerminator(Lp);
 
   // Skip induction resume value creation here because they will be created in
   // the second pass. If we created them here, they wouldn't be used anyway,
@@ -8433,7 +8406,8 @@
 
 void EpilogueVectorizerMainLoop::printDebugTracesAtEnd() {
   DEBUG_WITH_TYPE(VerboseDebug, {
-    dbgs() << "intermediate fn:\n" << *Induction->getFunction() << "\n";
+    dbgs() << "intermediate fn:\n"
+           << *OrigLoop->getHeader()->getParent() << "\n";
   });
 }
 
@@ -8566,12 +8540,7 @@
 
   // Generate the induction variable.
   OldInduction = Legal->getPrimaryInduction();
-  Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
-  Constant *Step = ConstantInt::get(IdxTy, VF.getKnownMinValue() * UF);
-  Value *StartIdx = EPResumeVal;
-  Induction =
-      createInductionVariable(Lp, StartIdx, CountRoundDown, Step,
-                              getDebugLocFromInstOrOperands(OldInduction));
+  createLatchTerminator(Lp);
 
   // Generate induction resume values. These variables save the new starting
   // indexes for the scalar loop. They are used to test if there are any tail
@@ -8580,6 +8549,7 @@
   // check, then the resume value for the induction variable comes from
   // the trip count of the main vector loop, hence passing the AdditionalBypass
   // argument.
+  Value *CountRoundDown = getOrCreateVectorTripCount(Lp);
   createInductionResumeValues(Lp, CountRoundDown,
                               {VecEpilogueIterationCountCheck,
                                EPI.VectorTripCount} /* AdditionalBypass */);
@@ -8630,7 +8600,7 @@
 
 void EpilogueVectorizerEpilogueLoop::printDebugTracesAtEnd() {
   DEBUG_WITH_TYPE(VerboseDebug, {
-    dbgs() << "final fn:\n" << *Induction->getFunction() << "\n";
+    dbgs() << "final fn:\n" << *OrigLoop->getHeader()->getParent() << "\n";
   });
 }
 
@@ -9390,7 +9360,9 @@
   assert(isa<VPRegionBlock>(Plan->getEntry()) &&
          !Plan->getEntry()->getEntryBasicBlock()->empty() &&
          "entry block must be set to a non-empty VPBasicBlock");
-  cast<VPRegionBlock>(Plan->getEntry())->setExit(VPBB);
+  VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
+  TopRegion->setExit(VPBB);
+
   RecipeBuilder.fixHeaderPhis();
 
   // ---------------------------------------------------------------------------
@@ -9523,6 +9495,21 @@
       }
   }
 
+  Type *IdxTy = Legal->getWidestInductionType();
+  Value *StartIdx = ConstantInt::get(IdxTy, 0);
+  auto *StartV = Plan->getOrAddVPValue(StartIdx);
+
+  auto *PrimaryInd = new VPCanonicalIVRecipe(StartV);
+  PrimaryInd->insertBefore(
+      &*TopRegion->getEntry()->getEntryBasicBlock()->begin());
+
+  auto *InductionIncrement = cast<VPInstruction>(new VPInstruction(
+      !CM.foldTailByMasking() ? VPInstruction::InductionIncrementNUW
+                              : VPInstruction::InductionIncrement,
+      {PrimaryInd->getVPSingleValue()}));
+  PrimaryInd->addOperand(InductionIncrement->getVPSingleValue());
+  VPBB->appendRecipe(InductionIncrement);
+
   // From this point onwards, VPlan-to-VPlan transformations may change the plan
   // in ways that accessing values using original IR values is incorrect.
   Plan->disableValue2VPValue();
@@ -9579,6 +9566,25 @@
   VPlanTransforms::VPInstructionsToVPRecipes(OrigLoop, Plan,
                                              Legal->getInductionVars(),
                                              DeadInstructions, *PSE.getSE());
+
+  Type *IdxTy = Legal->getWidestInductionType();
+  Value *StartIdx = ConstantInt::get(IdxTy, 0);
+  auto *StartV = Plan->getOrAddVPValue(StartIdx);
+
+  auto *PrimaryInd = new VPCanonicalIVRecipe(StartV);
+  VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
+  PrimaryInd->insertBefore(
+      &*cast<VPBasicBlock>(TopRegion->getEntry()->getSingleSuccessor())
+            ->begin());
+
+  auto *InductionIncrement = new VPInstruction(
+      VPInstruction::InductionIncrementNUW, {PrimaryInd->getVPSingleValue()});
+  PrimaryInd->addOperand(InductionIncrement->getVPSingleValue());
+
+  VPBasicBlock *OuterLatch =
+      cast<VPBasicBlock>(TopRegion->getExit()->getSinglePredecessor());
+  OuterLatch->insert(InductionIncrement, std::prev(OuterLatch->end()));
+
   return Plan;
 }
 
@@ -9718,9 +9724,11 @@
 
 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
   assert(!State.Instance && "Int or FP induction being replicated.");
+  auto *PrimInd = State.get(
+      getParent()->getPlan()->getCanonicalIV()->getVPSingleValue(), 0);
   State.ILV->widenIntOrFpInduction(
       IV, getStartValue()->getLiveInIRValue(), getTruncInst(), getVPValue(0),
-      getCastValue(), getStepValue(), getPhiValue(), State);
+      getCastValue(), getStepValue(), getPhiValue(), State, PrimInd);
 }
 
 void VPWidenPHIRecipe::execute(VPTransformState &State) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -351,6 +351,8 @@
 
   /// Pointer to the VPlan code is generated for.
   VPlan *Plan;
+
+  Value *VectorTripCount;
 };
 
 /// VPUsers instance used by VPBlockBase to manage CondBit and the block
@@ -789,6 +791,8 @@
     SLPLoad,
     SLPStore,
     ActiveLaneMask,
+    InductionIncrement,
+    InductionIncrementNUW,
   };
 
 private:
@@ -1607,6 +1611,30 @@
 #endif
 };
 
+class VPCanonicalIVRecipe : public VPRecipeBase {
+public:
+  VPCanonicalIVRecipe(VPValue *StartV)
+      : VPRecipeBase(VPCanonicalIVSC, {StartV}) {
+    new VPValue(nullptr, this);
+  }
+
+  ~VPCanonicalIVRecipe() override = default;
+
+  /// Method to support type inquiry through isa, cast, and dyn_cast.
+  static inline bool classof(const VPDef *D) {
+    return D->getVPDefID() == VPCanonicalIVSC;
+  }
+
+  /// Generate a canonical vector induction variable of the vector loop, with
+  void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  /// Print the recipe.
+  void print(raw_ostream &O, const Twine &Indent,
+             VPSlotTracker &SlotTracker) const override;
+#endif
+};
+
 /// A Recipe for widening the canonical induction variable of the vector loop.
 class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
 public:
@@ -2277,6 +2305,14 @@
     return cast<VPRegionBlock>(getEntry());
   }
 
+  VPCanonicalIVRecipe *getCanonicalIV() {
+    VPBasicBlock *EntryVPBB = getEntry()->getEntryBasicBlock();
+    if (EntryVPBB->empty())
+      // VPlan native path.
+      EntryVPBB = cast<VPBasicBlock>(EntryVPBB->getSingleSuccessor());
+    return cast<VPCanonicalIVRecipe>(&*EntryVPBB->phis().begin());
+  }
+
 private:
   /// Add to the given dominator tree the header block and every new basic block
   /// that was created between it and the latch block, inclusive.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -60,6 +60,14 @@
 }
 #endif
 
+static Value *createStepForVF(IRBuilder<> &B, Constant *Step, ElementCount VF) {
+  assert(isa<ConstantInt>(Step) && "Expected an integer step");
+  Constant *StepVal = ConstantInt::get(Step->getType(),
+                                       cast<ConstantInt>(Step)->getSExtValue() *
+                                           VF.getKnownMinValue());
+  return VF.isScalable() ? B.CreateVScale(StepVal) : StepVal;
+}
+
 Value *VPLane::getAsRuntimeExpr(IRBuilder<> &Builder,
                                 const ElementCount &VF) const {
   switch (LaneKind) {
@@ -660,6 +668,9 @@
   }
 
   switch (getOpcode()) {
+  case Instruction::PHI: {
+    break;
+  }
   case VPInstruction::Not: {
     Value *A = State.get(getOperand(0), Part);
     Value *V = Builder.CreateNot(A);
@@ -719,6 +730,20 @@
     }
     break;
   }
+
+  case VPInstruction::InductionIncrement:
+  case VPInstruction::InductionIncrementNUW: {
+    if (Part == 0) {
+      bool IsNUW = getOpcode() == VPInstruction::InductionIncrementNUW;
+      auto *Phi = State.get(getOperand(0), Part);
+      Value *Step = createStepForVF(
+          Builder, ConstantInt::get(Phi->getType(), State.UF), State.VF);
+      Value *Next = Builder.CreateAdd(Phi, Step, "index.next", IsNUW, false);
+      State.set(getVPSingleValue(), Next, Part);
+    }
+
+    break;
+  }
   default:
     llvm_unreachable("Unsupported opcode for instruction");
   }
@@ -764,6 +789,12 @@
   case VPInstruction::FirstOrderRecurrenceSplice:
     O << "first-order splice";
     break;
+  case VPInstruction::InductionIncrement:
+    O << "induction increment";
+    break;
+  case VPInstruction::InductionIncrementNUW:
+    O << "induction increment nuw";
+    break;
   default:
     O << Instruction::getOpcodeName(getOpcode());
   }
@@ -823,6 +854,41 @@
   for (VPBlockBase *Block : depth_first(Entry))
     Block->execute(State);
 
+  // Setup branch terminator successors for VPBBs in VPBBsToFix based on
+  // VPBB's successors.
+  for (auto VPBB : State->CFG.VPBBsToFix) {
+    assert(EnableVPlanNativePath &&
+           "Unexpected VPBBsToFix in non VPlan-native path");
+    BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
+    assert(BB && "Unexpected null basic block for VPBB");
+
+    unsigned Idx = 0;
+    auto *BBTerminator = BB->getTerminator();
+
+    for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
+      VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
+      BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
+      ++Idx;
+    }
+  }
+
+  // 3. Merge the temporary latch created with the last basic-block filled.
+  BasicBlock *LastBB = State->CFG.PrevBB;
+  // Connect LastBB to VectorLatchBB to facilitate their merge.
+  assert((EnableVPlanNativePath ||
+          isa<UnreachableInst>(LastBB->getTerminator())) &&
+         "Expected InnerLoop VPlan CFG to terminate with unreachable");
+  assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) &&
+         "Expected VPlan CFG to terminate with branch in NativePath");
+  LastBB->getTerminator()->eraseFromParent();
+  BranchInst::Create(VectorLatchBB, LastBB);
+
+  // Merge LastBB with Latch.
+  bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
+  (void)Merged;
+  assert(Merged && "Could not merge last basic block with latch.");
+  VectorLatchBB = LastBB;
+
   // Fix the latch value of reduction and first-order recurrences phis in the
   // vector loop.
   VPBasicBlock *Header = Entry->getEntryBasicBlock();
@@ -831,6 +897,21 @@
     Header = cast<VPBasicBlock>(Header->getSingleSuccessor());
   }
   for (VPRecipeBase &R : Header->phis()) {
+    if (auto *Ind = dyn_cast<VPCanonicalIVRecipe>(&R)) {
+      auto *BackedgeValue = cast<VPInstruction>(Ind->getOperand(1)->getDef());
+      auto *P = cast<PHINode>(State->get(Ind->getVPSingleValue(), 0));
+      BasicBlock *LatchBB = State->CFG.VPBB2IRBB[BackedgeValue->getParent()];
+      P->addIncoming(State->get(BackedgeValue, 0), LatchBB);
+      auto *Next = cast<Instruction>(P->getIncomingValueForBlock(LatchBB));
+      auto *TermBr = cast<BranchInst>(LatchBB->getTerminator());
+      State->Builder.SetInsertPoint(TermBr);
+      auto *ICmp = cast<Instruction>(
+          State->Builder.CreateICmpEQ(Next, State->VectorTripCount));
+      TermBr->setCondition(ICmp);
+      Next->moveBefore(ICmp);
+      continue;
+    }
+
     if (auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
       if (!State->hasAnyVectorValue(IV->getStepValue()))
         continue;
@@ -843,8 +924,8 @@
       auto *LoopVectorLatch =
           State->LI->getLoopFor(State->CFG.PrevBB)->getLoopLatch();
       auto *Br = cast<BranchInst>(LoopVectorLatch->getTerminator());
-      auto *ICmp = cast<Instruction>(Br->getCondition());
-      LastInduction->moveBefore(ICmp);
+      auto *Cmp = cast<Instruction>(Br->getCondition());
+      LastInduction->moveBefore(Cmp);
       LastInduction->setName("vec.ind.next");
 
       VecInd->addIncoming(LastInduction, LoopVectorLatch);
@@ -868,41 +949,6 @@
     }
   }
 
-  // Setup branch terminator successors for VPBBs in VPBBsToFix based on
-  // VPBB's successors.
-  for (auto VPBB : State->CFG.VPBBsToFix) {
-    assert(EnableVPlanNativePath &&
-           "Unexpected VPBBsToFix in non VPlan-native path");
-    BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
-    assert(BB && "Unexpected null basic block for VPBB");
-
-    unsigned Idx = 0;
-    auto *BBTerminator = BB->getTerminator();
-
-    for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
-      VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
-      BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
-      ++Idx;
-    }
-  }
-
-  // 3. Merge the temporary latch created with the last basic-block filled.
-  BasicBlock *LastBB = State->CFG.PrevBB;
-  // Connect LastBB to VectorLatchBB to facilitate their merge.
-  assert((EnableVPlanNativePath ||
-          isa<UnreachableInst>(LastBB->getTerminator())) &&
-         "Expected InnerLoop VPlan CFG to terminate with unreachable");
-  assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) &&
-         "Expected VPlan CFG to terminate with branch in NativePath");
-  LastBB->getTerminator()->eraseFromParent();
-  BranchInst::Create(VectorLatchBB, LastBB);
-
-  // Merge LastBB with Latch.
-  bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
-  (void)Merged;
-  assert(Merged && "Could not merge last basic block with latch.");
-  VectorLatchBB = LastBB;
-
   // We do not attempt to preserve DT for outer loop vectorization currently.
   if (!EnableVPlanNativePath)
     updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB,
@@ -1274,8 +1320,27 @@
 }
 #endif
 
+void VPCanonicalIVRecipe::execute(VPTransformState &State) {
+  Value *Start = getOperand(0)->getLiveInIRValue();
+  PHINode *EntryPart = PHINode::Create(
+      Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt());
+  EntryPart->addIncoming(Start, State.CFG.VectorPreHeader);
+  State.set(getVPSingleValue(), EntryPart, 0);
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
+                                VPSlotTracker &SlotTracker) const {
+  O << Indent << "EMIT ";
+  getVPSingleValue()->printAsOperand(O, SlotTracker);
+  O << " = CANONICAL-INDUCTION";
+}
+#endif
+
 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
-  Value *CanonicalIV = State.CanonicalIV;
+
+  Value *CanonicalIV = State.get(
+      getParent()->getPlan()->getCanonicalIV()->getVPSingleValue(), 0);
   Type *STy = CanonicalIV->getType();
   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
   ElementCount VF = State.VF;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -101,6 +101,7 @@
 
     // Phi-like VPValues. Need to be kept together.
     VPVBlendSC,
+    VPVCanonicalIVSC,
     VPVFirstOrderRecurrencePHISC,
     VPVWidenPHISC,
     VPVWidenCanonicalIVSC,
@@ -332,6 +333,7 @@
 
     // Phi-like recipes. Need to be kept together.
     VPBlendSC,
+    VPCanonicalIVSC,
     VPFirstOrderRecurrencePHISC,
     VPWidenPHISC,
     VPWidenCanonicalIVSC,
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll
@@ -13,6 +13,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv>
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
@@ -61,6 +62,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: loop.1.split:
 ; CHECK-NEXT:   WIDEN ir<%add> = add ir<%conv>, vp<[[PRED2]]>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: No successors
@@ -92,6 +94,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
@@ -125,6 +128,7 @@
 ; CHECK-NEXT: Successor(s): loop.1
 ; CHECK-EMPTY:
 ; CHECK-NEXT: loop.1:
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: No successors
@@ -156,6 +160,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   WIDEN-REDUCTION-PHI ir<%and.red> = phi ir<1234>, ir<%and.red.next>
@@ -187,6 +192,7 @@
 ; CHECK-NEXT:   WIDEN ir<%add> = add vp<[[PRED]]>, ir<%recur.next>
 ; CHECK-NEXT:   WIDEN ir<%and.red.next> = and ir<%and.red>, ir<%add>
 ; CHECK-NEXT:   EMIT vp<[[SEL:%.+]]> = select vp<[[MASK]]> ir<%and.red.next> ir<%and.red>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: No successors
@@ -221,6 +227,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   FIRST-ORDER-RECURRENCE-PHI ir<%0> = phi ir<0>, ir<%conv>
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
@@ -275,6 +282,7 @@
 ; CHECK-NEXT:   WIDEN ir<%add.1> = add ir<%conv>, vp<[[PRED1]]>
 ; CHECK-NEXT:   WIDEN ir<%conv.lv.2> = sext vp<[[PRED2]]>
 ; CHECK-NEXT:   WIDEN ir<%add> = add ir<%add.1>, ir<%conv.lv.2>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: No successors
@@ -310,6 +318,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   FIRST-ORDER-RECURRENCE-PHI ir<%recur> = phi ir<0>, ir<%recur.next>
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
@@ -361,6 +370,7 @@
 ; CHECK-NEXT: Successor(s): loop.2
 ; CHECK-EMPTY:
 ; CHECK-NEXT: loop.2:
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: No successors
diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
--- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
+++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll
@@ -39,9 +39,11 @@
 ; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   WIDEN ir<%cond0> = icmp ir<%iv>, ir<13>
 ; CHECK-NEXT:   WIDEN-SELECT ir<%s> = select ir<%cond0>, ir<10>, ir<20>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successor
 ; CHECK-NEXT: }
 define void @test() {
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
--- a/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-dot-printing.ll
@@ -12,12 +12,14 @@
 ; CHECK-NEXT:    label="\<x1\> vector loop"
 ; CHECK-NEXT:    N1 [label =
 ; CHECK-NEXT:    "for.body:\l" +
+; CHECK-NEXT:    "  EMIT vp\<%0\> = CANONICAL-INDUCTION\l" +
 ; CHECK-NEXT:    "  WIDEN-INDUCTION %iv = phi %iv.next, 0\l" +
 ; CHECK-NEXT:    "  CLONE ir\<%arrayidx\> = getelementptr ir\<%y\>, ir\<%iv\>\l" +
 ; CHECK-NEXT:    "  WIDEN ir\<%lv\> = load ir\<%arrayidx\>\l" +
 ; CHECK-NEXT:    "  WIDEN-CALL ir\<%call\> = call @llvm.sqrt.f32(ir\<%lv\>)\l" +
 ; CHECK-NEXT:    "  CLONE ir\<%arrayidx2\> = getelementptr ir\<%x\>, ir\<%iv\>\l" +
 ; CHECK-NEXT:    "  WIDEN store ir\<%arrayidx2\>, ir\<%call\>\l" +
+; CHECK-NEXT:    "  EMIT vp\<%8\> = induction increment nuw vp\<%0\>\l" +
 ; CHECK-NEXT:    "No successors\l"
 ; CHECK-NEXT:  ]
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll
@@ -11,12 +11,14 @@
 ; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: for.body:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi %iv.next, 0
 ; CHECK-NEXT:   CLONE ir<%arrayidx> = getelementptr ir<%y>, ir<%iv>
 ; CHECK-NEXT:   WIDEN ir<%lv> = load ir<%arrayidx>
 ; CHECK-NEXT:   WIDEN-CALL ir<%call> = call @llvm.sqrt.f32(ir<%lv>)
 ; CHECK-NEXT:   CLONE ir<%arrayidx2> = getelementptr ir<%x>, ir<%iv>
 ; CHECK-NEXT:   WIDEN store ir<%arrayidx2>, ir<%call>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: No successors
@@ -46,6 +48,7 @@
 ; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: for.body:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi %iv.next, 0
 ; CHECK-NEXT:   WIDEN-GEP Inv[Var] ir<%arrayidx> = getelementptr ir<%y>, ir<%iv>
 ; CHECK-NEXT:   WIDEN ir<%lv> = load ir<%arrayidx>
@@ -54,6 +57,7 @@
 ; CHECK-NEXT:   WIDEN ir<%add> = fadd ir<%lv>, ir<%sel>
 ; CHECK-NEXT:   CLONE ir<%arrayidx2> = getelementptr ir<%x>, ir<%iv>
 ; CHECK-NEXT:   WIDEN store ir<%arrayidx2>, ir<%add>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: No successors
@@ -85,11 +89,13 @@
 ; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: for.body:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi %iv.next, 0
 ; CHECK-NEXT:   WIDEN-REDUCTION-PHI ir<%red> = phi ir<0.000000e+00>, ir<%red.next>
 ; CHECK-NEXT:   CLONE ir<%arrayidx> = getelementptr ir<%y>, ir<%iv>
 ; CHECK-NEXT:   WIDEN ir<%lv> = load ir<%arrayidx>
 ; CHECK-NEXT:   REDUCE ir<%red.next> = ir<%red> + reduce.fadd (ir<%lv>)
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: No successors
@@ -117,6 +123,7 @@
 ; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: for.body:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %i = phi 0, %i.next
 ; CHECK-NEXT:   WIDEN ir<%cmp> = icmp ir<%i>, ir<5>
 ; CHECK-NEXT: Successor(s): if.then
@@ -148,6 +155,7 @@
 ; CHECK-NEXT:   BLEND %d = ir<0>/vp<[[NOT]]> vp<[[PRED]]>/ir<%cmp>
 ; CHECK-NEXT:   CLONE ir<%idx> = getelementptr ir<%x>, ir<%i>
 ; CHECK-NEXT:   WIDEN store ir<%idx>, ir<%d>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: No successors
@@ -185,6 +193,7 @@
 ; CHECK:       VPlan 'Initial VPlan for VF={4},UF>=1' {
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT:  for.body:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   CLONE ir<%gep.AB.0> = getelementptr ir<@AB>, ir<0>, ir<%iv>
 ; CHECK-NEXT:   INTERLEAVE-GROUP with factor 4 at %AB.0, ir<%gep.AB.0>
@@ -206,6 +215,7 @@
 ; CHECK-NEXT:     store ir<1> to index 1
 ; CHECK-NEXT:     store ir<2> to index 2
 ; CHECK-NEXT:     store ir<%AB.3> to index 3
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: No successors
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge-vf1.ll
@@ -10,6 +10,7 @@
 ; CHECK:      VPlan 'Initial VPlan for VF={1},UF>=1' {
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: for.body:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %tmp0 = phi %tmp6, 0
 ; CHECK-NEXT:   WIDEN-INDUCTION %tmp1 = phi %tmp7, 0
 ; CHECK-NEXT:   CLONE ir<%tmp2> = getelementptr ir<%ptr>, ir<%tmp0>
@@ -40,6 +41,7 @@
 ; CHECK-NEXT: Successor(s): for.inc
 
 ; CHECK:      for.inc:
+; CHECK-NEXT:  EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ; CHECK-NEXT: No successors
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
--- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll
@@ -15,6 +15,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
 ; CHECK-NEXT: Successor(s): loop.0
@@ -45,6 +46,7 @@
 ; CHECK:      loop.1:
 ; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
 ; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -76,6 +78,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
 ; CHECK-NEXT: Successor(s): pred.load
@@ -119,6 +122,7 @@
 ; CHECK:       loop.1:
 ; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
 ; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -150,6 +154,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
 ; CHECK-NEXT: Successor(s): pred.load
@@ -193,6 +198,7 @@
 ; CHECK:      loop.1:
 ; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
 ; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -226,9 +232,10 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 21, %iv.next
-; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION
-; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[CAN_IV]]> vp<[[BTC]]>
+; CHECK-NEXT:   EMIT vp<[[WIDE_CAN_IV:%.+]]> = WIDEN-CANONICAL-INDUCTION
+; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule vp<[[WIDE_CAN_IV]]> vp<[[BTC]]>
 ; CHECK-NEXT:   CLONE ir<%gep.A.uniform> = getelementptr ir<%A>, ir<0>
 ; CHECK-NEXT: Successor(s): pred.load
 ; CHECK-EMPTY:
@@ -277,6 +284,7 @@
 ; CHECK-NEXT: Successor(s): loop.latch
 ; CHECK-EMPTY:
 ; CHECK-NEXT: loop.latch:
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -311,6 +319,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   WIDEN ir<%c.1> = icmp ir<%iv>, ir<%j>
 ; CHECK-NEXT:   WIDEN ir<%mul> = mul ir<%iv>, ir<10>
@@ -367,6 +376,7 @@
 ; CHECK-NEXT: next.0.0:
 ; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
 ; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -407,6 +417,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   WIDEN ir<%mul> = mul ir<%iv>, ir<10>
 ; CHECK-NEXT:   WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j>
@@ -471,6 +482,7 @@
 ; CHECK-NEXT: next.1:
 ; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
 ; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -518,6 +530,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   WIDEN ir<%mul> = mul ir<%iv>, ir<10>
 ; CHECK-NEXT:   WIDEN ir<%c.0> = icmp ir<%iv>, ir<%j>
@@ -580,6 +593,7 @@
 ; CHECK-NEXT: next.1:
 ; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
 ; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -623,6 +637,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
 ; CHECK-NEXT:   REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
@@ -690,6 +705,7 @@
 ; CHECK-NEXT: latch:
 ; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
 ; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -733,6 +749,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
 ; CHECK-NEXT:   REPLICATE ir<%gep.a> = getelementptr ir<@a>, ir<0>, ir<%iv>
@@ -766,6 +783,7 @@
 ; CHECK-NEXT: loop.2:
 ; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
 ; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -795,6 +813,7 @@
 ; CHECK-EMPTY:
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   FIRST-ORDER-RECURRENCE-PHI ir<%for> = phi ir<0>, ir<%lv.a>
 ; CHECK-NEXT:   EMIT vp<[[MASK:%.+]]> = icmp ule ir<%iv> vp<[[BTC]]>
@@ -844,6 +863,7 @@
 ; CHECK-NEXT: loop.2:
 ; CHECK-NEXT:   CLONE ir<%large> = icmp ir<%iv>, ir<8>
 ; CHECK-NEXT:   CLONE ir<%exitcond> = icmp ir<%iv>, ir<%k>
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -872,6 +892,7 @@
 ; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop.header:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT: Successor(s): loop.then
 ; CHECK-EMPTY:
@@ -906,6 +927,7 @@
 ; CHECK-NEXT: Successor(s): loop.latch
 ; CHECK-EMPTY:
 ; CHECK-NEXT: loop.latch:
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;
@@ -939,6 +961,7 @@
 ; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
 ; CHECK-NEXT: <x1> vector loop: {
 ; CHECK-NEXT: loop.header:
+; CHECK-NEXT:   EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
 ; CHECK-NEXT:   WIDEN-INDUCTION %iv = phi 0, %iv.next
 ; CHECK-NEXT:   CLONE ir<%gep> = getelementptr ir<%addr>, ir<%iv>
 ; CHECK-NEXT: Successor(s): loop.body
@@ -972,6 +995,7 @@
 ; CHECK-NEXT: Successor(s): loop.latch
 ; CHECK-EMPTY:
 ; CHECK-NEXT: loop.latch:
+; CHECK-NEXT:   EMIT vp<{{.+}}> = induction increment nuw vp<[[CAN_IV]]>
 ; CHECK-NEXT: No successors
 ; CHECK-NEXT: }
 ;