Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3308,8 +3308,8 @@
   SCEVExpander Exp(*SE, DL, "induction");
   auto Step = ID.getStep();
   auto StartValue = ID.getStartValue();
-  assert(Index->getType() == Step->getType() &&
-         "Index type does not match StepValue type");
+  assert(Index->getType()->getScalarType() == Step->getType() &&
+         "Index scalar type does not match StepValue type");
 
   // Note: the IR at this point is broken. We cannot use SE to create any new
   // SCEV and then expand it, hoping that SCEV's simplification will give us
@@ -3328,14 +3328,20 @@
     return B.CreateAdd(X, Y);
   };
 
+  // We allow X to be a vector type, in which case Y will potentially be
+  // splatted into a vector with the same element count.
   auto CreateMul = [&B](Value *X, Value *Y) {
-    assert(X->getType() == Y->getType() && "Types don't match!");
+    assert(X->getType()->getScalarType() == Y->getType() &&
+           "Types don't match!");
     if (auto *CX = dyn_cast<ConstantInt>(X))
       if (CX->isOne())
         return Y;
     if (auto *CY = dyn_cast<ConstantInt>(Y))
       if (CY->isOne())
         return X;
+    VectorType *XVTy = dyn_cast<VectorType>(X->getType());
+    if (XVTy && !isa<VectorType>(Y->getType()))
+      Y = B.CreateVectorSplat(XVTy->getElementCount(), Y);
     return B.CreateMul(X, Y);
   };
 
@@ -3354,6 +3360,7 @@
 
   switch (ID.getKind()) {
   case InductionDescriptor::IK_IntInduction: {
+    assert(!isa<VectorType>(Index->getType()));
     assert(Index->getType() == StartValue->getType() &&
            "Index type does not match StartValue type");
     if (ID.getConstIntStepValue() && ID.getConstIntStepValue()->isMinusOne())
@@ -3368,9 +3375,10 @@
     return B.CreateGEP(
         StartValue->getType()->getPointerElementType(), StartValue,
         CreateMul(Index,
-                  Exp.expandCodeFor(Step, Index->getType(), GetInsertPoint())));
+                  Exp.expandCodeFor(Step, Index->getType()->getScalarType(), GetInsertPoint())));
   }
   case InductionDescriptor::IK_FpInduction: {
+    assert(!isa<VectorType>(Index->getType()));
     assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
     auto InductionBinOp = ID.getInductionBinOp();
     assert(InductionBinOp &&
@@ -4769,13 +4777,34 @@
       // iteration. If the instruction is uniform, we only need to generate the
       // first lane. Otherwise, we generate all VF values.
       bool IsUniform = Cost->isUniformAfterVectorization(P, State.VF);
-      assert((IsUniform || !VF.isScalable()) &&
-             "Currently unsupported for scalable vectors");
-      unsigned Lanes = IsUniform ? 1 : State.VF.getFixedValue();
+      unsigned Lanes = IsUniform ? 1 : State.VF.getKnownMinValue();
+
+      Value *UnitStepVec = nullptr, *PtrIndSplat = nullptr;
+      if (!IsUniform && VF.isScalable()) {
+        Type *VecIVTy = VectorType::get(PtrInd->getType(), VF);
+        UnitStepVec = Builder.CreateStepVector(VecIVTy);
+        PtrIndSplat = Builder.CreateVectorSplat(VF, PtrInd);
+      }
 
       for (unsigned Part = 0; Part < UF; ++Part) {
         Value *PartStart = createStepForVF(
             Builder, ConstantInt::get(PtrInd->getType(), Part), VF);
+
+        if (!IsUniform && VF.isScalable()) {
+          Value *PartStartSplat = Builder.CreateVectorSplat(VF, PartStart);
+          Value *Indices = Builder.CreateAdd(PartStartSplat, UnitStepVec);
+          Value *GlobalIndices = Builder.CreateAdd(PtrIndSplat, Indices);
+          Value *SclrGep =
+              emitTransformedIndex(Builder, GlobalIndices, PSE.getSE(), DL, II);
+          SclrGep->setName("next.gep");
+          State.set(PhiR, SclrGep, Part);
+          // We've cached the whole vector, which means we can support the
+          // extraction of any lane. In addition, we can also cache the first
+          // known minimum number of lanes as a further optimization, since
+          // we frequently want to extract the first lane anyway. This avoids
+          // an additional extractelement operation to get the first lane.
+        }
+
         for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
           Value *Idx = Builder.CreateAdd(
               PartStart, ConstantInt::get(PtrInd->getType(), Lane));
Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -114,9 +114,57 @@
   ret void
 }
 
+
+;
+; Check multiple pointer induction variables where only one is recognized as
+; uniform and remains uniform after vectorization. The other pointer induction
+; variable is not recognized as uniform and is not uniform after vectorization
+; because it is stored to memory.
+;
+
+define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) {
+; CHECK-LABEL: @pointer_iv_mixed(
+; CHECK:     vector.body
+; CHECK:       %[[IDX:.*]] = phi i64 [ 0, %vector.ph ], [ %{{.*}}, %vector.body ]
+; CHECK:       %[[STEPVEC:.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT:  %[[TMP1:.*]] = insertelement <vscale x 2 x i64> poison, i64 %[[IDX]], i32 0
+; CHECK-NEXT:  %[[TMP2:.*]] = shufflevector <vscale x 2 x i64> %[[TMP1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:  %[[VECIND:.*]] = add <vscale x 2 x i64> %[[TMP2]], %[[STEPVEC]]
+; CHECK-NEXT:  %[[APTRS:.*]] = getelementptr i32, i32* %a, <vscale x 2 x i64> %[[VECIND]]
+; CHECK-NEXT:  %{{.*}} = getelementptr i32, i32* %a, i64 %[[IDX]]
+; CHECK-NEXT:  %[[GEPB:.*]] = getelementptr i32*, i32** %b, i64 %[[IDX]]
+; CHECK:       %[[BPTR:.*]] = bitcast i32** %[[GEPB]] to <vscale x 2 x i32*>*
+; CHECK-NEXT:  store <vscale x 2 x i32*> %[[APTRS]], <vscale x 2 x i32*>* %[[BPTR]], align 8
+
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+  %p = phi i32* [ %tmp3, %for.body ], [ %a, %entry ]
+  %q = phi i32** [ %tmp4, %for.body ], [ %b, %entry ]
+  %tmp0 = phi i32 [ %tmp2, %for.body ], [ 0, %entry ]
+  %tmp1 = load i32, i32* %p, align 8
+  %tmp2 = add i32 %tmp1, %tmp0
+  store i32* %p, i32** %q, align 8
+  %tmp3 = getelementptr inbounds i32, i32* %p, i32 1
+  %tmp4 = getelementptr inbounds i32*, i32** %q, i32 1
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  br i1 %cond, label %for.body, label %for.end, !llvm.loop !6
+
+for.end:
+  %tmp5 = phi i32 [ %tmp2, %for.body ]
+  ret i32 %tmp5
+}
+
+
 !0 = distinct !{!0, !1, !2, !3, !4, !5}
 !1 = !{!"llvm.loop.mustprogress"}
 !2 = !{!"llvm.loop.vectorize.width", i32 4}
 !3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
 !4 = !{!"llvm.loop.vectorize.enable", i1 true}
 !5 = !{!"llvm.loop.interleave.count", i32 2}
+!6 = distinct !{!6, !1, !7, !3, !4, !8}
+!7 = !{!"llvm.loop.vectorize.width", i32 2}
+!8 = !{!"llvm.loop.interleave.count", i32 1}