Index: llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -214,6 +214,8 @@
     VPCallbackILV(InnerLoopVectorizer &ILV) : ILV(ILV) {}
 
     Value *getOrCreateVectorValues(Value *V, unsigned Part) override;
+    Value *getOrCreateScalarValue(Value *V,
+                                  const VPIteration &Instance) override;
   };
 
   /// A builder used to construct the current plan.
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -484,13 +484,14 @@
 
   /// Try to vectorize the interleaved access group that \p Instr belongs to,
   /// optionally masking the vector operations if \p BlockInMask is non-null.
-  void vectorizeInterleaveGroup(Instruction *Instr,
-                                VectorParts *BlockInMask = nullptr);
+  void vectorizeInterleaveGroup(Instruction *Instr, VPTransformState &State,
+                                VPValue *Addr, VPValue *BlockInMask = nullptr);
 
   /// Vectorize Load and Store instructions, optionally masking the vector
   /// operations if \p BlockInMask is non-null.
-  void vectorizeMemoryInstruction(Instruction *Instr,
-                                  VectorParts *BlockInMask = nullptr);
+  void vectorizeMemoryInstruction(Instruction *Instr, VPTransformState &State,
+                                  VPValue *Addr,
+                                  VPValue *BlockInMask = nullptr);
 
   /// Set the debug location in the builder using the debug location in
   /// the instruction.
@@ -2161,7 +2162,9 @@
 //        <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>    ; Interleave R,G,B elements
 //   store <12 x i32> %interleaved.vec              ; Write 4 tuples of R,G,B
 void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr,
-                                                   VectorParts *BlockInMask) {
+                                                   VPTransformState &State,
+                                                   VPValue *Addr,
+                                                   VPValue *BlockInMask) {
   const InterleaveGroup<Instruction> *Group =
       Cost->getInterleavedAccessGroup(Instr);
   assert(Group && "Fail to get an interleaved access group.");
@@ -2171,23 +2174,21 @@
     return;
 
   const DataLayout &DL = Instr->getModule()->getDataLayout();
-  Value *Ptr = getLoadStorePointerOperand(Instr);
 
   // Prepare for the vector type of the interleaved load/store.
   Type *ScalarTy = getMemInstValueType(Instr);
   unsigned InterleaveFactor = Group->getFactor();
   Type *VecTy = VectorType::get(ScalarTy, InterleaveFactor * VF);
-  Type *PtrTy = VecTy->getPointerTo(getLoadStoreAddressSpace(Instr));
 
   // Prepare for the new pointers.
-  setDebugLocFromInst(Builder, Ptr);
   SmallVector<Value *, 2> NewPtrs;
   unsigned Index = Group->getIndex(Instr);
 
-  VectorParts Mask;
+  VectorParts Mask(UF);
   bool IsMaskForCondRequired = BlockInMask;
   if (IsMaskForCondRequired) {
-    Mask = *BlockInMask;
+    for (unsigned Part = 0; Part < UF; ++Part)
+      Mask[Part] = State.get(BlockInMask, Part);
     // TODO: extend the masked interleaved-group support to reversed access.
     assert(!Group->isReverse() && "Reversed masked interleave-group "
                                   "not supported.");
@@ -2202,12 +2203,13 @@
   if (Group->isReverse())
     Index += (VF - 1) * Group->getFactor();
 
-  bool InBounds = false;
-  if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
-    InBounds = gep->isInBounds();
-
   for (unsigned Part = 0; Part < UF; Part++) {
-    Value *NewPtr = getOrCreateScalarValue(Ptr, {Part, 0});
+    Value *NewPtr = State.get(Addr, {Part, 0});
+    setDebugLocFromInst(Builder, NewPtr);
+
+    bool InBounds = false;
+    if (auto *gep = dyn_cast<GetElementPtrInst>(NewPtr->stripPointerCasts()))
+      InBounds = gep->isInBounds();
 
     // Notice current instruction could be any index. Need to adjust the address
     // to the member of index 0.
@@ -2225,6 +2227,8 @@
       cast<GetElementPtrInst>(NewPtr)->setIsInBounds(true);
 
     // Cast to the vector pointer type.
+    unsigned AddressSpace = NewPtr->getType()->getPointerAddressSpace();
+    Type *PtrTy = VecTy->getPointerTo(AddressSpace);
     NewPtrs.push_back(Builder.CreateBitCast(NewPtr, PtrTy));
   }
 
@@ -2348,7 +2352,9 @@
 }
 
 void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr,
-                                                     VectorParts *BlockInMask) {
+                                                     VPTransformState &State,
+                                                     VPValue *Addr,
+                                                     VPValue *BlockInMask) {
   // Attempt to issue a wide load.
   LoadInst *LI = dyn_cast<LoadInst>(Instr);
   StoreInst *SI = dyn_cast<StoreInst>(Instr);
@@ -2360,17 +2366,15 @@
   assert(Decision != LoopVectorizationCostModel::CM_Unknown &&
          "CM decision should be taken at this point");
   if (Decision == LoopVectorizationCostModel::CM_Interleave)
-    return vectorizeInterleaveGroup(Instr);
+    return vectorizeInterleaveGroup(Instr, State, Addr, BlockInMask);
 
   Type *ScalarDataTy = getMemInstValueType(Instr);
   Type *DataTy = VectorType::get(ScalarDataTy, VF);
-  Value *Ptr = getLoadStorePointerOperand(Instr);
   // An alignment of 0 means target abi alignment. We need to use the scalar's
   // target abi alignment in such a case.
   const DataLayout &DL = Instr->getModule()->getDataLayout();
   const Align Alignment =
       DL.getValueOrABITypeAlignment(getLoadStoreAlignment(Instr), ScalarDataTy);
-  unsigned AddressSpace = getLoadStoreAddressSpace(Instr);
 
   // Determine if the pointer operand of the access is either consecutive or
   // reverse consecutive.
@@ -2385,24 +2389,20 @@
   assert((ConsecutiveStride || CreateGatherScatter) &&
          "The instruction should be scalarized");
 
-  // Handle consecutive loads/stores.
-  if (ConsecutiveStride)
-    Ptr = getOrCreateScalarValue(Ptr, {0, 0});
-
-  VectorParts Mask;
+  VectorParts Mask(UF);
   bool isMaskRequired = BlockInMask;
   if (isMaskRequired)
-    Mask = *BlockInMask;
-
-  bool InBounds = false;
-  if (auto *gep = dyn_cast<GetElementPtrInst>(
-          getLoadStorePointerOperand(Instr)->stripPointerCasts()))
-    InBounds = gep->isInBounds();
+    for (unsigned Part = 0; Part < UF; ++Part)
+      Mask[Part] = State.get(BlockInMask, Part);
 
   const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
     // Calculate the pointer for the specific unroll-part.
     GetElementPtrInst *PartPtr = nullptr;
 
+    bool InBounds = false;
+    if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
+      InBounds = gep->isInBounds();
+
     if (Reverse) {
       // If the address is consecutive but reversed, then the
       // wide store needs to start at the last vector element.
@@ -2420,6 +2420,7 @@
       PartPtr->setIsInBounds(InBounds);
     }
 
+    unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
     return Builder.CreateBitCast(PartPtr, DataTy->getPointerTo(AddressSpace));
   };
 
@@ -2432,7 +2433,7 @@
       Value *StoredVal = getOrCreateVectorValue(SI->getValueOperand(), Part);
       if (CreateGatherScatter) {
         Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr;
-        Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
+        Value *VectorGep = State.get(Addr, Part);
         NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep,
                                             Alignment.value(), MaskPart);
       } else {
@@ -2443,7 +2444,7 @@
           // We don't want to update the value in the map as it might be used in
           // another expression. So don't call resetVectorValue(StoredVal).
         }
-        auto *VecPtr = CreateVecPtr(Part, Ptr);
+        auto *VecPtr = CreateVecPtr(Part, State.get(Addr, {0, 0}));
         if (isMaskRequired)
           NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr,
                                             Alignment.value(), Mask[Part]);
@@ -2463,12 +2464,12 @@
     Value *NewLI;
     if (CreateGatherScatter) {
       Value *MaskPart = isMaskRequired ? Mask[Part] : nullptr;
-      Value *VectorGep = getOrCreateVectorValue(Ptr, Part);
+      Value *VectorGep = State.get(Addr, Part);
       NewLI = Builder.CreateMaskedGather(VectorGep, Alignment.value(), MaskPart,
                                          nullptr, "wide.masked.gather");
       addMetadata(NewLI, LI);
     } else {
-      auto *VecPtr = CreateVecPtr(Part, Ptr);
+      auto *VecPtr = CreateVecPtr(Part, State.get(Addr, {0, 0}));
       if (isMaskRequired)
         NewLI = Builder.CreateMaskedLoad(VecPtr, Alignment.value(), Mask[Part],
                                          UndefValue::get(DataTy),
@@ -6759,7 +6760,11 @@
   if (Legal->isMaskRequired(I))
     Mask = createBlockInMask(I->getParent(), Plan);
 
-  return new VPWidenMemoryInstructionRecipe(*I, Mask);
+  Value *Addr = getLoadStorePointerOperand(I);
+  assert(Addr && "Expected a load/store at this point");
+
+  return new VPWidenMemoryInstructionRecipe(*I, Plan->getOrAddVPValue(Addr),
+                                            Mask);
 }
 
 VPWidenIntOrFpInductionRecipe *
@@ -7208,7 +7213,8 @@
   for (auto IG : InterleaveGroups) {
     auto *Recipe = cast<VPWidenMemoryInstructionRecipe>(
         RecipeBuilder.getRecipe(IG->getInsertPos()));
-    (new VPInterleaveRecipe(IG, Recipe->getMask()))->insertBefore(Recipe);
+    (new VPInterleaveRecipe(IG, Recipe->getAddr(), Recipe->getMask()))
+        ->insertBefore(Recipe);
 
     for (unsigned i = 0; i < IG->getFactor(); ++i)
       if (Instruction *Member = IG->getMember(i)) {
@@ -7282,13 +7288,21 @@
       return ILV.getOrCreateVectorValue(V, Part);
 }
 
+Value *LoopVectorizationPlanner::VPCallbackILV::getOrCreateScalarValue(
+    Value *V, const VPIteration &Instance) {
+  return ILV.getOrCreateScalarValue(V, Instance);
+}
+
 void VPInterleaveRecipe::print(raw_ostream &O, const Twine &Indent) const {
   O << " +\n"
     << Indent << "\"INTERLEAVE-GROUP with factor " << IG->getFactor() << " at ";
   IG->getInsertPos()->printAsOperand(O, false);
-  if (User) {
+  O << ", ";
+  getAddr()->printAsOperand(O);
+  VPValue *Mask = getMask();
+  if (Mask) {
     O << ", ";
-    User->getOperand(0)->printAsOperand(O);
+    Mask->printAsOperand(O);
   }
   O << "\\l\"";
   for (unsigned i = 0; i < IG->getFactor(); ++i)
@@ -7357,15 +7371,8 @@
 
 void VPInterleaveRecipe::execute(VPTransformState &State) {
   assert(!State.Instance && "Interleave group being replicated.");
-  if (!User)
-    return State.ILV->vectorizeInterleaveGroup(IG->getInsertPos());
-
-  // Last (and currently only) operand is a mask.
-  InnerLoopVectorizer::VectorParts MaskValues(State.UF);
-  VPValue *Mask = User->getOperand(User->getNumOperands() - 1);
-  for (unsigned Part = 0; Part < State.UF; ++Part)
-    MaskValues[Part] = State.get(Mask, Part);
-  State.ILV->vectorizeInterleaveGroup(IG->getInsertPos(), &MaskValues);
+  State.ILV->vectorizeInterleaveGroup(IG->getInsertPos(), State, getAddr(),
+                                      getMask());
 }
 
 void VPReplicateRecipe::execute(VPTransformState &State) {
@@ -7452,14 +7459,7 @@
 }
 
 void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
-  VPValue *Mask = getMask();
-  if (!Mask)
-    return State.ILV->vectorizeMemoryInstruction(&Instr);
-
-  InnerLoopVectorizer::VectorParts MaskValues(State.UF);
-  for (unsigned Part = 0; Part < State.UF; ++Part)
-    MaskValues[Part] = State.get(Mask, Part);
-  State.ILV->vectorizeMemoryInstruction(&Instr, &MaskValues);
+  State.ILV->vectorizeMemoryInstruction(&Instr, State, getAddr(), getMask());
 }
 
 static ScalarEpilogueLowering
Index: llvm/lib/Transforms/Vectorize/VPlan.h
===================================================================
--- llvm/lib/Transforms/Vectorize/VPlan.h
+++ llvm/lib/Transforms/Vectorize/VPlan.h
@@ -227,6 +227,8 @@
 struct VPCallback {
   virtual ~VPCallback() {}
   virtual Value *getOrCreateVectorValues(Value *V, unsigned Part) = 0;
+  virtual Value *getOrCreateScalarValue(Value *V,
+                                        const VPIteration &Instance) = 0;
 };
 
 /// VPTransformState holds information passed down when "executing" a VPlan,
@@ -269,6 +271,15 @@
     return Callback.getOrCreateVectorValues(VPValue2Value[Def], Part);
   }
 
+  /// Get the generated Value for a given VPValue and given Part and Lane. Note
+  /// that as some Defs are still created by ILV and managed in its ValueMap,
+  /// this method will delegate the call to ILV in such cases in order to
+  /// provide callers a consistent API.
+  Value *get(VPValue *Def, const VPIteration &Instance) {
+    // Def is managed by ILV: bring the Values from ValueMap.
+    return Callback.getOrCreateScalarValue(VPValue2Value[Def], Instance);
+  }
+
   /// Set the generated Value for a given VPValue and a given Part.
   void set(VPValue *Def, Value *V, unsigned Part) {
     if (!Data.PerPartOutput.count(Def)) {
@@ -862,13 +873,14 @@
 class VPInterleaveRecipe : public VPRecipeBase {
 private:
   const InterleaveGroup<Instruction> *IG;
-  std::unique_ptr<VPUser> User;
+  VPUser User;
 
 public:
-  VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Mask)
-      : VPRecipeBase(VPInterleaveSC), IG(IG) {
-    if (Mask) // Create a VPInstruction to register as a user of the mask.
-      User.reset(new VPUser({Mask}));
+  VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
+                     VPValue *Mask)
+      : VPRecipeBase(VPInterleaveSC), IG(IG), User({Addr}) {
+    if (Mask)
+      User.addOperand(Mask);
   }
   ~VPInterleaveRecipe() override = default;
 
@@ -877,6 +889,19 @@
     return V->getVPRecipeID() == VPRecipeBase::VPInterleaveSC;
   }
 
+  /// Return the address accessed by this recipe.
+  VPValue *getAddr() const {
+    // Address is the 1st operand.
+    return User.getOperand(0);
+  }
+
+  /// Return the mask used by this recipe. Note that a full mask is represented
+  /// by a nullptr.
+  VPValue *getMask() const {
+    // Mask is the 2nd operand.
+    return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr;
+  }
+
   /// Generate the wide load or store, and shuffles.
   void execute(VPTransformState &State) override;
 
@@ -999,13 +1024,14 @@
 class VPWidenMemoryInstructionRecipe : public VPRecipeBase {
 private:
   Instruction &Instr;
-  std::unique_ptr<VPUser> User;
+  VPUser User;
 
 public:
-  VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Mask)
-      : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr) {
-    if (Mask) // Create a VPInstruction to register as a user of the mask.
-      User.reset(new VPUser({Mask}));
+  VPWidenMemoryInstructionRecipe(Instruction &Instr, VPValue *Addr,
+                                 VPValue *Mask)
+      : VPRecipeBase(VPWidenMemoryInstructionSC), Instr(Instr), User({Addr}) {
+    if (Mask)
+      User.addOperand(Mask);
   }
 
   /// Method to support type inquiry through isa, cast, and dyn_cast.
@@ -1013,11 +1039,17 @@
     return V->getVPRecipeID() == VPRecipeBase::VPWidenMemoryInstructionSC;
   }
 
+  /// Return the address accessed by this recipe.
+  VPValue *getAddr() const {
+    // Address is the 1st operand.
+    return User.getOperand(0);
+  }
+
   /// Return the mask used by this recipe. Note that a full mask is represented
   /// by a nullptr.
-  VPValue *getMask() {
-    // Mask is the last operand.
-    return User ? User->getOperand(User->getNumOperands() - 1) : nullptr;
+  VPValue *getMask() const {
+    // Mask is the 2nd operand.
+    return User.getNumOperands() == 2 ? User.getOperand(1) : nullptr;
   }
 
   /// Generate the wide load/store.
@@ -1414,6 +1446,13 @@
     return Value2VPValue[V];
   }
 
+  VPValue *getOrAddVPValue(Value *V) {
+    assert(V && "Trying to get or add the VPValue of a null Value");
+    if (!Value2VPValue.count(V))
+      addVPValue(V);
+    return getVPValue(V);
+  }
+
   /// Return the VPLoopInfo analysis for this VPlan.
   VPLoopInfo &getVPLoopInfo() { return VPLInfo; }
   const VPLoopInfo &getVPLoopInfo() const { return VPLInfo; }
Index: llvm/lib/Transforms/Vectorize/VPlan.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -735,9 +735,12 @@
 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O,
                                            const Twine &Indent) const {
   O << " +\n" << Indent << "\"WIDEN " << VPlanIngredient(&Instr);
-  if (User) {
+  O << ", ";
+  getAddr()->printAsOperand(O);
+  VPValue *Mask = getMask();
+  if (Mask) {
     O << ", ";
-    User->getOperand(0)->printAsOperand(O);
+    Mask->printAsOperand(O);
   }
   O << "\\l\"";
 }
Index: llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -56,7 +56,9 @@
       VPRecipeBase *NewRecipe = nullptr;
       // Create VPWidenMemoryInstructionRecipe for loads and stores.
       if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
-        NewRecipe = new VPWidenMemoryInstructionRecipe(*Inst, nullptr /*Mask*/);
+        NewRecipe = new VPWidenMemoryInstructionRecipe(
+            *Inst, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
+            nullptr /*Mask*/);
       else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
         InductionDescriptor II = Inductions->lookup(Phi);
         if (II.getKind() == InductionDescriptor::IK_IntInduction ||