diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -216,6 +216,9 @@
     Value *getOrCreateVectorValues(Value *V, unsigned Part) override;
     Value *getOrCreateScalarValue(Value *V,
                                   const VPIteration &Instance) override;
+
+    virtual void setVectorValue(Value *Key, unsigned Part,
+                                Value *Vector) override;
   };
 
   /// A builder used to construct the current plan.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -406,9 +406,6 @@
   /// Return the pre-header block of the new loop.
   BasicBlock *createVectorizedLoopSkeleton();
 
-  /// Widen a single instruction within the innermost loop.
-  void widenInstruction(Instruction &I);
-
   /// Fix the vectorized code, taking care of header phi's, live-outs, and more.
   void fixVectorizedLoop();
 
@@ -4225,213 +4222,6 @@
   return !CInt || CInt->isZero();
 }
 
-void InnerLoopVectorizer::widenInstruction(Instruction &I) {
-  switch (I.getOpcode()) {
-  case Instruction::Br:
-  case Instruction::PHI:
-  case Instruction::GetElementPtr:
-    llvm_unreachable("This instruction is handled by a different recipe.");
-  case Instruction::UDiv:
-  case Instruction::SDiv:
-  case Instruction::SRem:
-  case Instruction::URem:
-  case Instruction::Add:
-  case Instruction::FAdd:
-  case Instruction::Sub:
-  case Instruction::FSub:
-  case Instruction::FNeg:
-  case Instruction::Mul:
-  case Instruction::FMul:
-  case Instruction::FDiv:
-  case Instruction::FRem:
-  case Instruction::Shl:
-  case Instruction::LShr:
-  case Instruction::AShr:
-  case Instruction::And:
-  case Instruction::Or:
-  case Instruction::Xor: {
-    // Just widen unops and binops.
-    setDebugLocFromInst(Builder, &I);
-
-    for (unsigned Part = 0; Part < UF; ++Part) {
-      SmallVector<Value *, 2> Ops;
-      for (Value *Op : I.operands())
-        Ops.push_back(getOrCreateVectorValue(Op, Part));
-
-      Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
-
-      if (auto *VecOp = dyn_cast<Instruction>(V))
-        VecOp->copyIRFlags(&I);
-
-      // Use this vector value for all users of the original instruction.
-      VectorLoopValueMap.setVectorValue(&I, Part, V);
-      addMetadata(V, &I);
-    }
-
-    break;
-  }
-  case Instruction::Select: {
-    // Widen selects.
-    // If the selector is loop invariant we can create a select
-    // instruction with a scalar condition. Otherwise, use vector-select.
-    auto *SE = PSE.getSE();
-    bool InvariantCond =
-        SE->isLoopInvariant(PSE.getSCEV(I.getOperand(0)), OrigLoop);
-    setDebugLocFromInst(Builder, &I);
-
-    // The condition can be loop invariant  but still defined inside the
-    // loop. This means that we can't just use the original 'cond' value.
-    // We have to take the 'vectorized' value and pick the first lane.
-    // Instcombine will make this a no-op.
-
-    auto *ScalarCond = getOrCreateScalarValue(I.getOperand(0), {0, 0});
-
-    for (unsigned Part = 0; Part < UF; ++Part) {
-      Value *Cond = getOrCreateVectorValue(I.getOperand(0), Part);
-      Value *Op0 = getOrCreateVectorValue(I.getOperand(1), Part);
-      Value *Op1 = getOrCreateVectorValue(I.getOperand(2), Part);
-      Value *Sel =
-          Builder.CreateSelect(InvariantCond ? ScalarCond : Cond, Op0, Op1);
-      VectorLoopValueMap.setVectorValue(&I, Part, Sel);
-      addMetadata(Sel, &I);
-    }
-
-    break;
-  }
-
-  case Instruction::ICmp:
-  case Instruction::FCmp: {
-    // Widen compares. Generate vector compares.
-    bool FCmp = (I.getOpcode() == Instruction::FCmp);
-    auto *Cmp = cast<CmpInst>(&I);
-    setDebugLocFromInst(Builder, Cmp);
-    for (unsigned Part = 0; Part < UF; ++Part) {
-      Value *A = getOrCreateVectorValue(Cmp->getOperand(0), Part);
-      Value *B = getOrCreateVectorValue(Cmp->getOperand(1), Part);
-      Value *C = nullptr;
-      if (FCmp) {
-        // Propagate fast math flags.
-        IRBuilder<>::FastMathFlagGuard FMFG(Builder);
-        Builder.setFastMathFlags(Cmp->getFastMathFlags());
-        C = Builder.CreateFCmp(Cmp->getPredicate(), A, B);
-      } else {
-        C = Builder.CreateICmp(Cmp->getPredicate(), A, B);
-      }
-      VectorLoopValueMap.setVectorValue(&I, Part, C);
-      addMetadata(C, &I);
-    }
-
-    break;
-  }
-
-  case Instruction::ZExt:
-  case Instruction::SExt:
-  case Instruction::FPToUI:
-  case Instruction::FPToSI:
-  case Instruction::FPExt:
-  case Instruction::PtrToInt:
-  case Instruction::IntToPtr:
-  case Instruction::SIToFP:
-  case Instruction::UIToFP:
-  case Instruction::Trunc:
-  case Instruction::FPTrunc:
-  case Instruction::BitCast: {
-    auto *CI = cast<CastInst>(&I);
-    setDebugLocFromInst(Builder, CI);
-
-    /// Vectorize casts.
-    Type *DestTy =
-        (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
-
-    for (unsigned Part = 0; Part < UF; ++Part) {
-      Value *A = getOrCreateVectorValue(CI->getOperand(0), Part);
-      Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
-      VectorLoopValueMap.setVectorValue(&I, Part, Cast);
-      addMetadata(Cast, &I);
-    }
-    break;
-  }
-
-  case Instruction::Call: {
-    // Ignore dbg intrinsics.
-    if (isa<DbgInfoIntrinsic>(I))
-      break;
-    setDebugLocFromInst(Builder, &I);
-
-    Module *M = I.getParent()->getParent()->getParent();
-    auto *CI = cast<CallInst>(&I);
-
-    SmallVector<Type *, 4> Tys;
-    for (Value *ArgOperand : CI->arg_operands())
-      Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
-
-    Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
-
-    // The flag shows whether we use Intrinsic or a usual Call for vectorized
-    // version of the instruction.
-    // Is it beneficial to perform intrinsic call compared to lib call?
-    bool NeedToScalarize = false;
-    unsigned CallCost = Cost->getVectorCallCost(CI, VF, NeedToScalarize);
-    bool UseVectorIntrinsic =
-        ID && Cost->getVectorIntrinsicCost(CI, VF) <= CallCost;
-    assert((UseVectorIntrinsic || !NeedToScalarize) &&
-           "Instruction should be scalarized elsewhere.");
-
-    for (unsigned Part = 0; Part < UF; ++Part) {
-      SmallVector<Value *, 4> Args;
-      for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
-        Value *Arg = CI->getArgOperand(i);
-        // Some intrinsics have a scalar argument - don't replace it with a
-        // vector.
-        if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i))
-          Arg = getOrCreateVectorValue(CI->getArgOperand(i), Part);
-        Args.push_back(Arg);
-      }
-
-      Function *VectorF;
-      if (UseVectorIntrinsic) {
-        // Use vector version of the intrinsic.
-        Type *TysForDecl[] = {CI->getType()};
-        if (VF > 1)
-          TysForDecl[0] = VectorType::get(CI->getType()->getScalarType(), VF);
-        VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
-      } else {
-        // Use vector version of the function call.
-        const VFShape Shape =
-            VFShape::get(*CI, {VF, false} /*EC*/, false /*HasGlobalPred*/);
-#ifndef NDEBUG
-        const SmallVector<VFInfo, 8> Infos = VFDatabase::getMappings(*CI);
-        assert(std::find_if(Infos.begin(), Infos.end(),
-                            [&Shape](const VFInfo &Info) {
-                              return Info.Shape == Shape;
-                            }) != Infos.end() &&
-               "Vector function shape is missing from the database.");
-#endif
-        VectorF = VFDatabase(*CI).getVectorizedFunction(Shape);
-      }
-      assert(VectorF && "Can't create vector function.");
-
-      SmallVector<OperandBundleDef, 1> OpBundles;
-      CI->getOperandBundlesAsDefs(OpBundles);
-      CallInst *V = Builder.CreateCall(VectorF, Args, OpBundles);
-
-      if (isa<FPMathOperator>(V))
-        V->copyFastMathFlags(CI);
-
-      VectorLoopValueMap.setVectorValue(&I, Part, V);
-      addMetadata(V, &I);
-    }
-
-    break;
-  }
-
-  default:
-    // This instruction is not vectorized by simple widening.
-    LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
-    llvm_unreachable("Unhandled instruction!");
-  } // end of switch.
-}
-
 void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
   // We should not collect Scalars more than once per VF. Right now, this
   // function is called from collectUniformsAndScalars(), which already does
@@ -6982,7 +6772,12 @@
     return false;
 
   // Success: widen this instruction.
-  VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I);
+  VPlan &Plan = *VPBB->getPlan();
+  // Create VPValue operands.
+  auto VPValues = map_range(
+      I->operands(), [&Plan](Value *Op) { return Plan.getOrAddVPValue(Op); });
+  SmallVector<VPValue *, 4> Values(VPValues.begin(), VPValues.end());
+  VPWidenRecipe *WidenRecipe = new VPWidenRecipe(I, Values);
   setRecipe(I, WidenRecipe);
   VPBB->appendRecipe(WidenRecipe);
   return true;
@@ -7365,8 +7160,17 @@
   ILV->addMetadata(To, From);
 }
 
-void VPTransformState::setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr) {
-  ILV->setDebugLocFromInst(B, Ptr);
+void VPTransformState::setDebugLocFromInst(const Value *Ptr) {
+  ILV->setDebugLocFromInst(Builder, Ptr);
+}
+
+unsigned VPTransformState::getVectorCallCost(CallInst *CI, unsigned VF,
+                                             bool &NeedToScalarize) {
+  return ILV->Cost->getVectorCallCost(CI, VF, NeedToScalarize);
+}
+
+unsigned VPTransformState::getVectorIntrinsicCost(CallInst *CI, unsigned VF) {
+  return ILV->Cost->getVectorIntrinsicCost(CI, VF);
 }
 
 void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent,
@@ -7389,7 +7193,222 @@
 }
 
 void VPWidenRecipe::execute(VPTransformState &State) {
-  State.ILV->widenInstruction(*Ingredient);
+  Instruction &I = *Ingredient;
+  auto GetVectorOps = [&State](ArrayRef<VPValue *> Ops, unsigned Part) {
+    SmallVector<Value *, 4> VecOps;
+    for (VPValue *Op : Ops)
+      VecOps.push_back(State.get(Op, Part));
+    return VecOps;
+  };
+  switch (I.getOpcode()) {
+  case Instruction::Br:
+  case Instruction::PHI:
+  case Instruction::GetElementPtr:
+    llvm_unreachable("This instruction is handled by a different recipe.");
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::FNeg:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::FDiv:
+  case Instruction::FRem:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    // Just widen unops and binops.
+    State.setDebugLocFromInst(&I);
+
+    for (unsigned Part = 0; Part < State.UF; ++Part) {
+      Value *V = State.Builder.CreateNAryOp(
+          I.getOpcode(), GetVectorOps(User.getOperandsRef(), Part));
+
+      if (auto *VecOp = dyn_cast<Instruction>(V))
+        VecOp->copyIRFlags(&I);
+
+      // Use this vector value for all users of the original instruction.
+      State.Callback.setVectorValue(&I, Part, V);
+      State.addMetadata(V, &I);
+    }
+
+    break;
+  }
+  case Instruction::Select: {
+    // Widen selects.
+    // If the selector is loop invariant we can create a select
+    // instruction with a scalar condition. Otherwise, use vector-select.
+    auto *SE = State.PSE.getSE();
+    bool InvariantCond = SE->isLoopInvariant(State.PSE.getSCEV(I.getOperand(0)),
+                                             State.OriginalLoop);
+    State.setDebugLocFromInst(&I);
+
+    // The condition can be loop invariant  but still defined inside the
+    // loop. This means that we can't just use the original 'cond' value.
+    // We have to take the 'vectorized' value and pick the first lane.
+    // Instcombine will make this a no-op.
+
+    auto *ScalarCond =
+        State.Callback.getOrCreateScalarValue(I.getOperand(0), {0, 0});
+
+    for (unsigned Part = 0; Part < State.UF; ++Part) {
+      auto VectorOps = GetVectorOps(User.getOperandsRef(), Part);
+      Value *Cond = VectorOps[0];
+      Value *Op0 = VectorOps[1];
+      Value *Op1 = VectorOps[2];
+      Value *Sel = State.Builder.CreateSelect(InvariantCond ? ScalarCond : Cond,
+                                              Op0, Op1);
+      State.Callback.setVectorValue(&I, Part, Sel);
+      State.addMetadata(Sel, &I);
+    }
+
+    break;
+  }
+
+  case Instruction::ICmp:
+  case Instruction::FCmp: {
+    // Widen compares. Generate vector compares.
+    bool FCmp = (I.getOpcode() == Instruction::FCmp);
+    auto *Cmp = cast<CmpInst>(&I);
+    State.setDebugLocFromInst(Cmp);
+    for (unsigned Part = 0; Part < State.UF; ++Part) {
+      auto VectorOps = GetVectorOps(User.getOperandsRef(), Part);
+      Value *A = VectorOps[0];
+      Value *B = VectorOps[1];
+      Value *C = nullptr;
+      if (FCmp) {
+        // Propagate fast math flags.
+        IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
+        State.Builder.setFastMathFlags(Cmp->getFastMathFlags());
+        C = State.Builder.CreateFCmp(Cmp->getPredicate(), A, B);
+      } else {
+        C = State.Builder.CreateICmp(Cmp->getPredicate(), A, B);
+      }
+      State.Callback.setVectorValue(&I, Part, C);
+      State.addMetadata(C, &I);
+    }
+
+    break;
+  }
+
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::FPExt:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::BitCast: {
+    auto *CI = cast<CastInst>(&I);
+    State.setDebugLocFromInst(CI);
+
+    /// Vectorize casts.
+    Type *DestTy = (State.VF == 1) ? CI->getType()
+                                   : VectorType::get(CI->getType(), State.VF);
+
+    for (unsigned Part = 0; Part < State.UF; ++Part) {
+      auto VectorOps = GetVectorOps(User.getOperandsRef(), Part);
+      Value *A = VectorOps[0];
+      Value *Cast = State.Builder.CreateCast(CI->getOpcode(), A, DestTy);
+      State.Callback.setVectorValue(&I, Part, Cast);
+      State.addMetadata(Cast, &I);
+    }
+    break;
+  }
+
+  case Instruction::Call: {
+    // Ignore dbg intrinsics.
+    if (isa<DbgInfoIntrinsic>(I))
+      break;
+    State.setDebugLocFromInst(&I);
+
+    Module *M = I.getParent()->getParent()->getParent();
+    auto *CI = cast<CallInst>(&I);
+
+    SmallVector<Type *, 4> Tys;
+    for (Value *ArgOperand : CI->arg_operands())
+      Tys.push_back(ToVectorTy(ArgOperand->getType(), State.VF));
+
+    Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, State.TLI);
+
+    // The flag shows whether we use Intrinsic or a usual Call for vectorized
+    // version of the instruction.
+    // Is it beneficial to perform intrinsic call compared to lib call?
+    bool NeedToScalarize = false;
+    unsigned CallCost = State.getVectorCallCost(CI, State.VF, NeedToScalarize);
+    bool UseVectorIntrinsic =
+        ID && State.getVectorIntrinsicCost(CI, State.VF) <= CallCost;
+    assert((UseVectorIntrinsic || !NeedToScalarize) &&
+           "Instruction should be scalarized elsewhere.");
+
+    for (unsigned Part = 0; Part < State.UF; ++Part) {
+      SmallVector<Value *, 4> Args;
+      // Need to use CI->getNumArgOperands, to skip the called function and
+      // operand bundles.
+      for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
+        VPValue *Arg = User.getOperand(i);
+        // Some intrinsics have a scalar argument - don't replace it with a
+        // vector.
+        Value *ArgVal = Arg->getUnderlyingValue();
+        if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i))
+          ArgVal = State.get(Arg, Part);
+        Args.push_back(ArgVal);
+      }
+
+      Function *VectorF;
+      if (UseVectorIntrinsic) {
+        // Use vector version of the intrinsic.
+        Type *TysForDecl[] = {CI->getType()};
+        if (State.VF > 1)
+          TysForDecl[0] =
+              VectorType::get(CI->getType()->getScalarType(), State.VF);
+        VectorF = Intrinsic::getDeclaration(M, ID, TysForDecl);
+      } else {
+        // Use vector version of the function call.
+        const VFShape Shape = VFShape::get(*CI, /*EC=*/{State.VF, false},
+                                           /*HasGlobalPred=*/false);
+#ifndef NDEBUG
+        const SmallVector<VFInfo, 8> Infos = VFDatabase::getMappings(*CI);
+        assert(std::find_if(Infos.begin(), Infos.end(),
+                            [&Shape](const VFInfo &Info) {
+                              return Info.Shape == Shape;
+                            }) != Infos.end() &&
+               "Vector function shape is missing from the database.");
+#endif
+        VectorF = VFDatabase(*CI).getVectorizedFunction(Shape);
+      }
+      assert(VectorF && "Can't create vector function.");
+
+      SmallVector<OperandBundleDef, 1> OpBundles;
+      CI->getOperandBundlesAsDefs(OpBundles);
+      CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
+
+      if (isa<FPMathOperator>(V))
+        V->copyFastMathFlags(CI);
+
+      State.Callback.setVectorValue(&I, Part, V);
+      State.addMetadata(V, &I);
+    }
+
+    break;
+  }
+
+  default:
+    // This instruction is not vectorized by simple widening.
+    LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
+    llvm_unreachable("Unhandled instruction!");
+  } //
 }
 
 void VPWidenGEPRecipe::execute(VPTransformState &State) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -230,6 +230,7 @@
   virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0;
   virtual Value *getOrCreateScalarValue(Value *V,
                                         const VPIteration &Instance) = 0;
+  virtual void setVectorValue(Value *Key, unsigned Part, Value *Vector) = 0;
 };
 
 /// VPTransformState holds information passed down when "executing" a VPlan,
@@ -293,7 +294,7 @@
 
   void addMetadata(ArrayRef<Value *> To, Instruction *From);
 
-  void setDebugLocFromInst(IRBuilder<> &B, const Value *Ptr);
+  void setDebugLocFromInst(const Value *Ptr);
 
   unsigned getVectorCallCost(CallInst *CI, unsigned VF, bool &NeedToScalarize);
 
@@ -784,8 +785,11 @@
   /// Hold the ingredients by pointing to their original BasicBlock location.
   Instruction *Ingredient;
 
+  VPUser User;
+
 public:
-  VPWidenRecipe(Instruction *I) : VPRecipeBase(VPWidenSC), Ingredient(I) {}
+  VPWidenRecipe(Instruction *I, ArrayRef<VPValue *> Operands)
+      : VPRecipeBase(VPWidenSC), Ingredient(I), User(Operands) {}
 
   ~VPWidenRecipe() override = default;
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -71,8 +71,14 @@
           NewRecipe = new VPWidenPHIRecipe(Phi);
       } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
         NewRecipe = new VPWidenGEPRecipe(GEP, OrigLoop);
-      } else
-        NewRecipe = new VPWidenRecipe(Inst);
+      } else {
+        // Create VPValue operands.
+        auto VPValues = map_range(I->operands(), [&Plan](Value *Op) {
+          return Plan.getOrAddVPValue(Op);
+        });
+        SmallVector<VPValue *, 4> Values(VPValues.begin(), VPValues.end());
+        NewRecipe = new VPWidenRecipe(Inst, Values);
+      }
 
       NewRecipe->insertBefore(Ingredient);
       Ingredient->eraseFromParent();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -43,6 +43,7 @@
   friend class VPBasicBlock;
   friend class VPInterleavedAccessInfo;
   friend class VPSlotTracker;
+  friend class VPWidenRecipe;
 
 private:
   const unsigned char SubclassID; ///< Subclass identifier (for isa/dyn_cast).
@@ -182,6 +183,8 @@
   const_operand_range operands() const {
     return const_operand_range(op_begin(), op_end());
   }
+
+  ArrayRef<VPValue *> getOperandsRef() { return {Operands}; }
 };
 class VPlan;
 class VPBasicBlock;