Index: lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- lib/Transforms/Vectorize/LoopVectorize.cpp
+++ lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1425,9 +1425,17 @@
   /// Returns true if the value V is uniform within the loop.
   bool isUniform(Value *V);
 
-  /// Returns true if this instruction will remain scalar after vectorization.
+  /// Returns true if \p I is known to be uniform after vectorization.
   bool isUniformAfterVectorization(Instruction *I) { return Uniforms.count(I); }
 
+  /// Returns true if \p I is known to be scalar after vectorization.
+  bool isScalarAfterVectorization(Instruction *I) {
+    if (Uniforms.count(I) || !TheLoop->contains(I))
+      return true;
+    auto *GEP = dyn_cast<GetElementPtrInst>(I);
+    return GEP && !VectorGEPs.count(GEP);
+  }
+
   /// Returns the information that we collected about runtime memory check.
   const RuntimePointerChecking *getRuntimePointerChecking() const {
     return LAI->getRuntimePointerChecking();
@@ -1480,6 +1488,17 @@
   bool isLegalMaskedGather(Type *DataType) {
     return TTI->isLegalMaskedGather(DataType);
   }
+  /// Returns true if the target machine can represent \p V as a masked gather
+  /// or scatter operation.
+  bool isLegalGatherOrScatter(Value *V) {
+    auto *LI = dyn_cast<LoadInst>(V);
+    auto *SI = dyn_cast<StoreInst>(V);
+    if (!LI && !SI)
+      return false;
+    auto *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
+    auto *Ty = cast<PointerType>(Ptr->getType())->getElementType();
+    return (LI && isLegalMaskedGather(Ty)) || (SI && isLegalMaskedScatter(Ty));
+  }
 
   /// Returns true if vector representation of the instruction \p I
   /// requires mask.
@@ -1504,9 +1523,26 @@
   /// transformation.
   bool canVectorizeWithIfConvert();
 
-  /// Collect the variables that need to stay uniform after vectorization.
+  /// Collect the instructions that are uniform after vectorization. An
+  /// instruction is uniform if we represent it with a single scalar value in
+  /// the vectorized loop corresponding to each vector iteration. Examples of
+  /// uniform instructions include scalar induction variables and pointer
+  /// operands of consecutive or interleaved memory accesses. Note that
+  /// although uniformity implies an instruction will be scalar, the reverse is
+  /// not true. In general, a scalarized instruction will be represented by VF
+  /// scalar values in the vectorized loop, each corresponding to an iteration
+  /// of the original scalar loop.
   void collectLoopUniforms();
 
+  /// Collect the getelementptr instructions that will be vectorized. A
+  /// getelementptr instruction is only vectorized if it is used for a legal
+  /// gather or scatter operation.
+  void collectVectorGEPs();
+
+  /// Returns the uniform pointer for \p I if \p I is uniform or has a uniform
+  /// pointer operand.
+  Instruction *getUniformPtr(Instruction *I);
+
   /// Return true if all of the instructions in the block can be speculatively
   /// executed. \p SafePtrs is a list of addresses that are known to be legal
   /// and we know that we can read from them without segfault.
@@ -1587,6 +1623,9 @@
   /// vectorization.
   SmallPtrSet<Instruction *, 4> Uniforms;
 
+  /// Holds the getelementptr instructions that will be vectorized.
+  SmallPtrSet<GetElementPtrInst *, 4> VectorGEPs;
+
   /// Can we assume the absence of NaNs.
   bool HasFunNoNaNAttr;
 
@@ -4539,9 +4578,6 @@
     return false;
   }
 
-  // Collect all of the variables that remain uniform after vectorization.
-  collectLoopUniforms();
-
   DEBUG(dbgs() << "LV: We can vectorize this loop"
                << (LAI->getRuntimePointerChecking()->Need
                        ? " (with a runtime bound check)"
@@ -4558,6 +4594,12 @@
   if (UseInterleaved)
     InterleaveInfo.analyzeInterleaving(*getSymbolicStrides());
 
+  // Collect all getelementptr instructions that will be vectorized.
+  collectVectorGEPs();
+
+  // Collect all instructions that are known to be uniform after vectorization.
+  collectLoopUniforms();
+
   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
@@ -4823,6 +4865,45 @@
   return true;
 }
 
+/// \brief Return the pointer operand of a load or store instruction.
+static Value *getPointerOperand(Value *I) {
+  if (auto *LI = dyn_cast<LoadInst>(I))
+    return LI->getPointerOperand();
+  if (auto *SI = dyn_cast<StoreInst>(I))
+    return SI->getPointerOperand();
+  return nullptr;
+}
+
+Instruction *LoopVectorizationLegality::getUniformPtr(Instruction *I) {
+
+  // If the instruction is a consecutive pointer it will be uniform after
+  // vectorization. In this case, return the instruction.
+  if (isa<PointerType>(I->getType()) && isConsecutivePtr(I))
+    return I;
+
+  // If the instruction is an interleaved access, the pointer operand will be
+  // treated as if it were a consecutive pointer.
+  if (isAccessInterleaved(I))
+    return cast<Instruction>(getPointerOperand(I));
+
+  // Otherwise, assume the instruction has no uniform pointer.
+  return nullptr;
+}
+
+void LoopVectorizationLegality::collectVectorGEPs() {
+  for (auto *BB : TheLoop->blocks())
+    for (auto &I : *BB) {
+      auto *Ptr = getPointerOperand(&I);
+      if (!Ptr)
+        continue;
+      auto *GEP = getGEPInstruction(Ptr);
+      if (GEP && isLegalGatherOrScatter(&I)) {
+        VectorGEPs.insert(GEP);
+        DEBUG(dbgs() << "LV: GEP will be vectorized: " << I << "\n");
+      }
+    }
+}
+
 void LoopVectorizationLegality::collectLoopUniforms() {
   // We now know that the loop is vectorizable!
   // Collect variables that will remain uniform after vectorization.
@@ -4843,16 +4924,14 @@
     DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n");
   }
 
-  // Also add all consecutive pointer values; these values will be uniform
-  // after vectorization (and subsequent cleanup).
-  for (auto *BB : TheLoop->blocks()) {
+  // Also add all uniform pointer values.
+  for (auto *BB : TheLoop->blocks())
     for (auto &I : *BB) {
-      if (I.getType()->isPointerTy() && isConsecutivePtr(&I)) {
-        Worklist.insert(&I);
-        DEBUG(dbgs() << "LV: Found uniform instruction: " << I << "\n");
+      if (auto *Ptr = getUniformPtr(&I)) {
+        Worklist.insert(Ptr);
+        DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ptr << "\n");
       }
     }
-  }
 
   // Expand Worklist in topological order: whenever a new instruction
   // is added , its users should be either already inside Worklist, or
@@ -4875,6 +4954,8 @@
     }
   } while (idx != Worklist.size());
 
+  Uniforms.insert(Worklist.begin(), Worklist.end());
+
   // For an instruction to be added into Worklist above, all its users inside
   // the current loop should be already added into Worklist. This condition
   // cannot be true for phi instructions which is always in a dependence loop.
@@ -4887,21 +4968,19 @@
     auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
     if (all_of(PN->users(),
                [&](User *U) -> bool {
-                 return U == UpdateV || isOutOfScope(U) ||
-                        Worklist.count(cast<Instruction>(U));
+                 auto *I = dyn_cast<Instruction>(U);
+                 return I && (I == UpdateV || isScalarAfterVectorization(I));
                }) &&
         all_of(UpdateV->users(), [&](User *U) -> bool {
-          return U == PN || isOutOfScope(U) ||
-                 Worklist.count(cast<Instruction>(U));
+          auto *I = dyn_cast<Instruction>(U);
+          return I && (I == PN || isScalarAfterVectorization(I));
         })) {
-      Worklist.insert(cast<Instruction>(PN));
-      Worklist.insert(cast<Instruction>(UpdateV));
+      Uniforms.insert(cast<Instruction>(PN));
+      Uniforms.insert(cast<Instruction>(UpdateV));
       DEBUG(dbgs() << "LV: Found uniform instruction: " << *PN << "\n");
       DEBUG(dbgs() << "LV: Found uniform instruction: " << *UpdateV << "\n");
     }
   }
-
-  Uniforms.insert(Worklist.begin(), Worklist.end());
 }
 
 bool LoopVectorizationLegality::canVectorizeMemory() {
@@ -5821,19 +5900,6 @@
   return Cost;
 }
 
-/// \brief Check if the load/store instruction \p I may be translated into
-/// gather/scatter during vectorization.
-///
-/// Pointer \p Ptr specifies address in memory for the given scalar memory
-/// instruction. We need it to retrieve data type.
-/// Using gather/scatter is possible when it is supported by target.
-static bool isGatherOrScatterLegal(Instruction *I, Value *Ptr,
-                                   LoopVectorizationLegality *Legal) {
-  auto *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
-  return (isa<LoadInst>(I) && Legal->isLegalMaskedGather(DataTy)) ||
-         (isa<StoreInst>(I) && Legal->isLegalMaskedScatter(DataTy));
-}
-
 /// \brief Check whether the address computation for a non-consecutive memory
 /// access looks like an unlikely candidate for being merged into the indexing
 /// mode.
@@ -6081,7 +6147,7 @@
     // Scalarized loads/stores.
     int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
     bool UseGatherOrScatter =
-        (ConsecutiveStride == 0) && isGatherOrScatterLegal(I, Ptr, Legal);
+        (ConsecutiveStride == 0) && Legal->isLegalGatherOrScatter(I);
 
     bool Reverse = ConsecutiveStride < 0;
     const DataLayout &DL = I->getModule()->getDataLayout();
@@ -6244,16 +6310,6 @@
   return false;
 }
 
-/// Take the pointer operand from the Load/Store instruction.
-/// Returns NULL if this is not a valid Load/Store instruction.
-static Value *getPointerOperand(Value *I) {
-  if (LoadInst *LI = dyn_cast<LoadInst>(I))
-    return LI->getPointerOperand();
-  if (StoreInst *SI = dyn_cast<StoreInst>(I))
-    return SI->getPointerOperand();
-  return nullptr;
-}
-
 void LoopVectorizationCostModel::collectValuesToIgnore() {
   // Ignore ephemeral values.
   CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
@@ -6267,43 +6323,10 @@
   }
 
   // Insert uniform instruction into VecValuesToIgnore.
-  // Collect non-gather/scatter and non-consecutive ptr in NonConsecutivePtr.
-  SmallPtrSet<Instruction *, 8> NonConsecutivePtr;
-  for (auto *BB : TheLoop->getBlocks()) {
-    for (auto &I : *BB) {
+  for (auto *BB : TheLoop->getBlocks())
+    for (auto &I : *BB)
       if (Legal->isUniformAfterVectorization(&I))
         VecValuesToIgnore.insert(&I);
-      Instruction *PI = dyn_cast_or_null<Instruction>(getPointerOperand(&I));
-      if (PI && !Legal->isConsecutivePtr(PI) &&
-          !isGatherOrScatterLegal(&I, PI, Legal))
-        NonConsecutivePtr.insert(PI);
-    }
-  }
-
-  // Ignore induction phis that are either used in uniform instructions or
-  // NonConsecutivePtr.
-  for (auto &Induction : *Legal->getInductionVars()) {
-    auto *PN = Induction.first;
-    auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
-
-    if (std::all_of(PN->user_begin(), PN->user_end(),
-                    [&](User *U) -> bool {
-                      Instruction *UI = dyn_cast<Instruction>(U);
-                      return U == UpdateV || !TheLoop->contains(UI) ||
-                             Legal->isUniformAfterVectorization(UI) ||
-                             NonConsecutivePtr.count(UI);
-                    }) &&
-        std::all_of(UpdateV->user_begin(), UpdateV->user_end(),
-                    [&](User *U) -> bool {
-                      Instruction *UI = dyn_cast<Instruction>(U);
-                      return U == PN || !TheLoop->contains(UI) ||
-                             Legal->isUniformAfterVectorization(UI) ||
-                             NonConsecutivePtr.count(UI);
-                    })) {
-      VecValuesToIgnore.insert(PN);
-      VecValuesToIgnore.insert(UpdateV);
-    }
-  }
 }
 
 void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
Index: test/Transforms/LoopVectorize/induction.ll
===================================================================
--- test/Transforms/LoopVectorize/induction.ll
+++ test/Transforms/LoopVectorize/induction.ll
@@ -248,6 +248,51 @@
   ret void
 }
 
+; Make sure we scalarize the step vectors used for the pointer arithmetic. We
+; can't easily simplify vectorized step vectors. (Interleaved accesses.)
+;
+; for (int i = 0; i < n; ++i)
+;   p[i].f = a[i * 4]
+;
+; INTERLEAVE-LABEL: @scalarize_induction_variable_04(
+; INTERLEAVE: vector.body:
+; INTERLEAVE:   %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; INTERLEAVE:   %[[i1:.+]] = or i64 %[[i0]], 1
+; INTERLEAVE:   %[[i2:.+]] = or i64 %[[i0]], 2
+; INTERLEAVE:   %[[i3:.+]] = or i64 %[[i0]], 3
+; INTERLEAVE:   %[[i4:.+]] = or i64 %[[i0]], 4
+; INTERLEAVE:   %[[i5:.+]] = or i64 %[[i0]], 5
+; INTERLEAVE:   %[[i6:.+]] = or i64 %[[i0]], 6
+; INTERLEAVE:   %[[i7:.+]] = or i64 %[[i0]], 7
+; INTERLEAVE:   getelementptr inbounds %pair, %pair* %p, i64 %[[i0]], i32 1
+; INTERLEAVE:   getelementptr inbounds %pair, %pair* %p, i64 %[[i1]], i32 1
+; INTERLEAVE:   getelementptr inbounds %pair, %pair* %p, i64 %[[i2]], i32 1
+; INTERLEAVE:   getelementptr inbounds %pair, %pair* %p, i64 %[[i3]], i32 1
+; INTERLEAVE:   getelementptr inbounds %pair, %pair* %p, i64 %[[i4]], i32 1
+; INTERLEAVE:   getelementptr inbounds %pair, %pair* %p, i64 %[[i5]], i32 1
+; INTERLEAVE:   getelementptr inbounds %pair, %pair* %p, i64 %[[i6]], i32 1
+; INTERLEAVE:   getelementptr inbounds %pair, %pair* %p, i64 %[[i7]], i32 1
+
+define void @scalarize_induction_variable_04(i32* %a, %pair* %p, i32 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry]
+  %0 = shl nsw i64 %i, 2
+  %1 = getelementptr inbounds i32, i32* %a, i64 %0
+  %2 = load i32, i32* %1, align 1
+  %3 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
+  store i32 %2, i32* %3, align 1
+  %i.next = add nuw nsw i64 %i, 1
+  %4 = trunc i64 %i.next to i32
+  %cond = icmp eq i32 %4, %n
+  br i1 %cond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
 ; Make sure that the loop exit count computation does not overflow for i8 and
 ; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the
 ; induction variable to a bigger type the exit count computation will overflow