Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -16260,6 +16260,34 @@
 
 The argument to this intrinsic must be a vector.
 
+
+'``llvm.experimental.stepvector``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+This is an overloaded intrinsic. You can use ``llvm.experimental.stepvector``
+to generate a vector whose lane values comprise the linear sequence
+<0, 1, 2, ...>. It is primarily intended for scalable vectors.
+
+::
+
+      declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+      declare <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+
+The '``llvm.experimental.stepvector``' intrinsics are used to create vectors
+of integers whose elements contain a linear sequence of values starting from 0
+with a step of 1.  It is recommended that this experimental intrinsic only be
+used for scalable vectors with integer elements. The intrinsic also works for
+fixed width vectors, but the user will probably get better code quality
+generating constant vectors instead. If the sequence value exceeds the allowed
+limit for the element type then the result for that lane is undefined.
+
+Arguments:
+""""""""""
+
+None.
+
 Matrix Intrinsics
 -----------------
 
Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1249,6 +1249,14 @@
       return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
                                              VarMask, Alignment, CostKind, I);
     }
+    case Intrinsic::experimental_stepvector: {
+      assert(RetTy->isIntegerTy() &&
+             "Stepvector can only be used for integer vectors");
+      if (isa<ScalableVectorType>(RetTy))
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+      // The cost of materialising a constant integer vector.
+      return TargetTransformInfo::TCC_Basic;
+    }
     case Intrinsic::experimental_vector_extract: {
       // FIXME: Handle case where a scalable vector is extracted from a scalable
       // vector
Index: llvm/include/llvm/CodeGen/ISDOpcodes.h
===================================================================
--- llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -569,6 +569,14 @@
   /// implicitly truncated to it.
   SPLAT_VECTOR,
 
+  /// STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised
+  /// of a linear sequence of unsigned values starting from 0 with a step of
+  /// IMM, where IMM must be a constant positive integer value. The operation
+  /// does not support returning fixed-width vectors or non-constant operands.
+  /// If the sequence value exceeds the limit allowed for the element type then
+  /// the values for those lanes are undefined.
+  STEP_VECTOR,
+
   /// MULHU/MULHS - Multiply high - Multiply two integers of type iN,
   /// producing an unsigned/signed value of type i[2*N], then return the top
   /// part.
Index: llvm/include/llvm/CodeGen/SelectionDAG.h
===================================================================
--- llvm/include/llvm/CodeGen/SelectionDAG.h
+++ llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -654,6 +654,13 @@
     return getConstant(Val, DL, VT, true, isOpaque);
   }
 
+  SDValue getStepVector(const SDLoc &DL, EVT ResVT, EVT OpVT,
+                        const APInt Step) {
+    assert(Step.getActiveBits() <= OpVT.getScalarSizeInBits());
+    SDValue StepVal = getConstant(Step.getZExtValue(), DL, OpVT);
+    return getNode(ISD::STEP_VECTOR, DL, ResVT, StepVal);
+  }
+
   /// Create a true or false constant of type \p VT using the target's
   /// BooleanContent for type \p OpVT.
   SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT);
Index: llvm/include/llvm/IR/IRBuilder.h
===================================================================
--- llvm/include/llvm/IR/IRBuilder.h
+++ llvm/include/llvm/IR/IRBuilder.h
@@ -854,6 +854,9 @@
   /// will be the same type as that of \p Scaling.
   Value *CreateVScale(Constant *Scaling, const Twine &Name = "");
 
+  /// Creates a vector of type \p DstType with the linear sequence <0, 1, ...>
+  Value *CreateStepVector(Type *DstType, const Twine &Name = "");
+
   /// Create a call to intrinsic \p ID with 1 operand which is mangled on its
   /// type.
   CallInst *CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V,
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -1331,6 +1331,9 @@
 def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty],
                            [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
 
+def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                                                        [], [IntrNoMem]>;
+
 //===---------------- Vector Predication Intrinsics --------------===//
 
 // Speculatable Binary operators
Index: llvm/include/llvm/Target/TargetSelectionDAG.td
===================================================================
--- llvm/include/llvm/Target/TargetSelectionDAG.td
+++ llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -657,6 +657,8 @@
 def vector_reverse : SDNode<"ISD::VECTOR_REVERSE", SDTVecReverse>;
 def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
 def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>;
+def step_vector : SDNode<"ISD::STEP_VECTOR", SDTypeProfile<1, 1,
+                       [SDTCisVec<0>, SDTCisInt<1>]>, []>;
 def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
                               []>;
 
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -108,6 +108,7 @@
                          Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SPLAT_VECTOR:
                          Res = PromoteIntRes_SPLAT_VECTOR(N); break;
+  case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break;
   case ISD::CONCAT_VECTORS:
                          Res = PromoteIntRes_CONCAT_VECTORS(N); break;
 
@@ -4763,6 +4764,18 @@
   return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) {
+  SDLoc dl(N);
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  assert(NOutVT.isVector() && "Type must be promoted to a vector type");
+  EVT NOutElemVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                            NOutVT.getVectorElementType());
+  APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
+
+  return DAG.getStepVector(dl, NOutVT, NOutElemVT, StepVal);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
   SDLoc dl(N);
 
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -303,6 +303,7 @@
   SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
   SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);
+  SDValue PromoteIntRes_STEP_VECTOR(SDNode *N);
   SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
   SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
@@ -834,6 +835,7 @@
   void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -917,6 +917,9 @@
   case ISD::SCALAR_TO_VECTOR:
     SplitVecRes_ScalarOp(N, Lo, Hi);
     break;
+  case ISD::STEP_VECTOR:
+    SplitVecRes_STEP_VECTOR(N, Lo, Hi);
+    break;
   case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
   case ISD::LOAD:
     SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
@@ -1619,6 +1622,30 @@
     Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
 }
 
+void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo,
+                                               SDValue &Hi) {
+  EVT LoVT, HiVT;
+  SDLoc dl(N);
+  assert(N->getValueType(0).isScalableVector() &&
+         "Only scalable vectors are supported for STEP_VECTOR");
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  SDValue Step = N->getOperand(0);
+
+  Lo = DAG.getNode(ISD::STEP_VECTOR, dl, LoVT, Step);
+
+  // Hi = Lo + (EltCnt * Step)
+  EVT EltVT = Step.getValueType();
+  SDValue EltCnt = DAG.getVScale(
+      dl, EltVT,
+      APInt(EltVT.getScalarSizeInBits(), LoVT.getVectorMinNumElements()));
+  SDValue StartOfHi = DAG.getNode(ISD::MUL, dl, EltVT, EltCnt, Step);
+  StartOfHi = DAG.getZExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType());
+  StartOfHi = DAG.getNode(ISD::SPLAT_VECTOR, dl, HiVT, StartOfHi);
+
+  Hi = DAG.getNode(ISD::STEP_VECTOR, dl, HiVT, Step);
+  Hi = DAG.getNode(ISD::ADD, dl, HiVT, Hi, StartOfHi);
+}
+
 void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo,
                                             SDValue &Hi) {
   EVT LoVT, HiVT;
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4270,6 +4270,15 @@
   return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue();
 }
 
+static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step,
+                               SelectionDAG &DAG) {
+  if (cast<ConstantSDNode>(Step)->isNullValue())
+    // We can just reuse Step here as we need a zero value anyway
+    return DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, Step);
+
+  return SDValue();
+}
+
 static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
                                 ArrayRef<SDValue> Ops,
                                 SelectionDAG &DAG) {
@@ -4489,6 +4498,11 @@
                         APFloat::rmNearestTiesToEven, &Ignored);
       return getConstantFP(FPV, DL, VT);
     }
+    case ISD::STEP_VECTOR: {
+      if (SDValue V = FoldSTEP_VECTOR(DL, VT, Operand, *this))
+        return V;
+      break;
+    }
     }
   }
 
@@ -4598,6 +4612,15 @@
 
   unsigned OpOpcode = Operand.getNode()->getOpcode();
   switch (Opcode) {
+  case ISD::STEP_VECTOR:
+    assert(VT.isScalableVector() &&
+           "STEP_VECTOR can only be used with scalable types");
+    assert(Operand.getValueType().bitsGE(VT.getScalarType()) &&
+           "Operand type should be at least as large as the element type");
+    assert(isa<ConstantSDNode>(Operand) &&
+           !cast<ConstantSDNode>(Operand)->getAPIntValue().isNegative() &&
+           "Expected positive integer constant for STEP_VECTOR");
+    break;
   case ISD::FREEZE:
     assert(VT == Operand.getValueType() && "Unexpected VT!");
     break;
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -774,6 +774,7 @@
 
   void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
   void visitVectorReverse(const CallInst &I);
+  void visitStepVector(const CallInst &I);
 
   void visitUserOp1(const Instruction &I) {
     llvm_unreachable("UserOp1 should not exist at instruction selection time!");
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6872,7 +6872,9 @@
   case Intrinsic::experimental_deoptimize:
     LowerDeoptimizeCall(&I);
     return;
-
+  case Intrinsic::experimental_stepvector:
+    visitStepVector(I);
+    return;
   case Intrinsic::vector_reduce_fadd:
   case Intrinsic::vector_reduce_fmul:
   case Intrinsic::vector_reduce_add:
@@ -10838,6 +10840,25 @@
   }
 }
 
+void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  auto DL = getCurSDLoc();
+  EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+  assert(ResultVT.isVector() && ResultVT.isInteger() &&
+         "Only expect integer vector types used for experimental_stepvector");
+  EVT OpVT =
+      TLI.getTypeToTransformTo(*DAG.getContext(), ResultVT.getScalarType());
+  if (ResultVT.isScalableVector())
+    setValue(&I, DAG.getStepVector(DL, ResultVT, OpVT,
+                                   APInt(OpVT.getScalarSizeInBits(), 1)));
+  else {
+    SmallVector<SDValue, 16> OpsStepConstants;
+    for (uint64_t i = 0; i < ResultVT.getVectorMinNumElements(); i++)
+      OpsStepConstants.push_back(DAG.getConstant(i, DL, OpVT));
+    setValue(&I, DAG.getBuildVector(ResultVT, DL, OpsStepConstants));
+  }
+}
+
 void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -290,6 +290,7 @@
   case ISD::VECTOR_SHUFFLE:             return "vector_shuffle";
   case ISD::SPLAT_VECTOR:               return "splat_vector";
   case ISD::VECTOR_REVERSE:             return "vector_reverse";
+  case ISD::STEP_VECTOR:                return "step_vector";
   case ISD::CARRY_FALSE:                return "carry_false";
   case ISD::ADDC:                       return "addc";
   case ISD::ADDE:                       return "adde";
Index: llvm/lib/IR/IRBuilder.cpp
===================================================================
--- llvm/lib/IR/IRBuilder.cpp
+++ llvm/lib/IR/IRBuilder.cpp
@@ -91,6 +91,23 @@
              : CreateMul(CI, Scaling);
 }
 
+Value *IRBuilderBase::CreateStepVector(Type *DstType, const Twine &Name) {
+  if (isa<FixedVectorType>(DstType)) {
+    Type *STy = DstType->getScalarType();
+    unsigned NumEls = cast<FixedVectorType>(DstType)->getNumElements();
+
+    // Create a vector of consecutive numbers from zero to VF.
+    SmallVector<Constant *, 8> Indices;
+    for (unsigned i = 0; i < NumEls; ++i)
+      Indices.push_back(ConstantInt::get(STy, i));
+
+    // Add the consecutive indices to the vector value.
+    return ConstantVector::get(Indices);
+  } else
+    return CreateIntrinsic(Intrinsic::experimental_stepvector, {DstType}, {},
+                           nullptr, Name);
+}
+
 CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size,
                                       MaybeAlign Align, bool isVolatile,
                                       MDNode *TBAATag, MDNode *ScopeTag,
Index: llvm/lib/IR/Verifier.cpp
===================================================================
--- llvm/lib/IR/Verifier.cpp
+++ llvm/lib/IR/Verifier.cpp
@@ -5179,6 +5179,13 @@
 
     break;
   }
+  case Intrinsic::experimental_stepvector: {
+    VectorType *VecTy = dyn_cast<VectorType>(Call.getType());
+    Assert(VecTy && VecTy->getScalarType()->isIntegerTy(),
+           "experimental_stepvector only supported for integer vector types.",
+           &Call);
+    break;
+  }
   case Intrinsic::experimental_vector_insert: {
     VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
     VectorType *SubVecTy = cast<VectorType>(Call.getArgOperand(1)->getType());
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -932,6 +932,7 @@
   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
                               bool OverrideNEON = false) const;
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1125,6 +1125,7 @@
       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+      setOperationAction(ISD::STEP_VECTOR, VT, Custom);
 
       setOperationAction(ISD::MULHU, VT, Expand);
       setOperationAction(ISD::MULHS, VT, Expand);
@@ -1143,6 +1144,7 @@
       setOperationAction(ISD::SELECT, VT, Custom);
       setOperationAction(ISD::SETCC, VT, Custom);
       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+      setOperationAction(ISD::STEP_VECTOR, VT, Custom);
       setOperationAction(ISD::TRUNCATE, VT, Custom);
       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
@@ -4378,6 +4380,8 @@
     return LowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::SPLAT_VECTOR:
     return LowerSPLAT_VECTOR(Op, DAG);
+  case ISD::STEP_VECTOR:
+    return LowerSTEP_VECTOR(Op, DAG);
   case ISD::EXTRACT_SUBVECTOR:
     return LowerEXTRACT_SUBVECTOR(Op, DAG);
   case ISD::INSERT_SUBVECTOR:
@@ -9021,6 +9025,37 @@
   return GenerateTBL(Op, ShuffleMask, DAG);
 }
 
+SDValue AArch64TargetLowering::LowerSTEP_VECTOR(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
+  assert(VT.isScalableVector() &&
+         "Only expect scalable vectors for STEP_VECTOR");
+  EVT ElemVT = VT.getScalarType();
+  SDValue StepVal = Op.getOperand(0);
+
+  if (ElemVT != MVT::i1) {
+    SDValue Zero = DAG.getConstant(0, dl, StepVal.getValueType());
+    return DAG.getNode(AArch64ISD::INDEX_VECTOR, dl, VT, Zero, StepVal);
+  }
+
+  ElementCount EC = VT.getVectorElementCount();
+  unsigned IntElSize = AArch64::SVEBitsPerBlock / EC.getKnownMinValue();
+  MVT IntElType = MVT::getIntegerVT(IntElSize);
+  MVT VecVT = MVT::getVectorVT(IntElType, EC);
+
+  // Create an index vector using integers with the same number of
+  // elements, i.e. nxv4i32 for a nxv4i1 predicate, etc.
+  MVT IndexVecType = IntElSize == 64 ? MVT::i64 : MVT::i32;
+  StepVal = DAG.getZExtOrTrunc(StepVal, dl, IndexVecType);
+  SDValue Zero = DAG.getConstant(0, dl, IndexVecType);
+  SDValue IndexVec =
+      DAG.getNode(AArch64ISD::INDEX_VECTOR, dl, VecVT, Zero, StepVal);
+
+  // Truncate the index vector to an i1 type
+  return DAG.getNode(ISD::TRUNCATE, dl, VT, IndexVec);
+}
+
 SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
                                                  SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -10177,6 +10212,11 @@
     SDValue Zero = DAG.getConstant(0, dl, OpVT);
     SDValue One = DAG.getConstant(1, dl, OpVT);
     SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
+    if (VT.isScalableVector()) {
+      SDValue Pg = getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
+      return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, dl, VT, Pg, And, Zero,
+                         DAG.getCondCode(ISD::SETNE));
+    }
     return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
   }
 
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -260,6 +260,20 @@
       return LT.first;
     break;
   }
+  case Intrinsic::experimental_stepvector: {
+    assert(isa<VectorType>(RetTy) &&
+           cast<VectorType>(RetTy)->getElementType()->isIntegerTy());
+    unsigned Cost = 1; // Cost of the `index' instruction
+    auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    // Legalisation of illegal vectors involves an `index' instruction plus
+    // (LT.first - 1) vector adds.
+    if (LT.first > 1) {
+      Type *LegalVTy = EVT(LT.second).getTypeForEVT(RetTy->getContext());
+      Cost += (LT.first - 1) *
+              getArithmeticInstrCost(Instruction::Add, LegalVTy, CostKind);
+    }
+    return Cost;
+  }
   default:
     break;
   }
Index: llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll
===================================================================
--- /dev/null
+++ llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll
@@ -0,0 +1,34 @@
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+neon  < %s | FileCheck %s
+
+; Check icmp for legal integer vectors.
+define void @stepvector_legal_int() {
+; CHECK-LABEL: 'stepvector_legal_int'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %2 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %3 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
+  %1 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
+  %2 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
+  %3 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
+  %4 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
+  ret void
+}
+
+; Check icmp for an illegal integer vector.
+define void @stepvector_illegal_int() {
+; CHECK-LABEL: 'stepvector_illegal_int'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %1 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %2 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
+  %1 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
+  %2 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
+  ret void
+}
+
+
+declare <2 x i64> @llvm.experimental.stepvector.v2i64()
+declare <4 x i32> @llvm.experimental.stepvector.v4i32()
+declare <8 x i16> @llvm.experimental.stepvector.v8i16()
+declare <16 x i8> @llvm.experimental.stepvector.v16i8()
+
+declare <4 x i64> @llvm.experimental.stepvector.v4i64()
+declare <16 x i32> @llvm.experimental.stepvector.v16i32()
Index: llvm/test/Analysis/CostModel/AArch64/sve-stepvector.ll
===================================================================
--- /dev/null
+++ llvm/test/Analysis/CostModel/AArch64/sve-stepvector.ll
@@ -0,0 +1,39 @@
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; Check icmp for legal integer vectors.
+define void @stepvector_legal_int() {
+; CHECK-LABEL: 'stepvector_legal_int'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %2 = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %3 = call <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = call <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+  %1 = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+  %2 = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+  %3 = call <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+  %4 = call <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+  ret void
+}
+
+; Check icmp for an illegal integer vector.
+define void @stepvector_illegal_int() {
+; CHECK-LABEL: 'stepvector_illegal_int'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %1 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %2 = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
+  %1 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+  %2 = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
+  ret void
+}
+
+
+declare <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+declare <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+declare <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+
+declare <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+declare <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
Index: llvm/test/CodeGen/AArch64/neon-stepvector.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/neon-stepvector.ll
@@ -0,0 +1,174 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK
+
+; LEGAL INTEGER TYPES
+
+define <2 x i64> @stepvector_v2i64() {
+; CHECK-LABEL: stepvector_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI0_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
+  ret <2 x i64> %0
+}
+
+define <4 x i32> @stepvector_v4i32() {
+; CHECK-LABEL: stepvector_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI1_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
+  ret <4 x i32> %0
+}
+
+define <8 x i16> @stepvector_v8i16() {
+; CHECK-LABEL: stepvector_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI2_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI2_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
+  ret <8 x i16> %0
+}
+
+define <16 x i8> @stepvector_v16i8() {
+; CHECK-LABEL: stepvector_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI3_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
+  ret <16 x i8> %0
+}
+
+; ILLEGAL INTEGER TYPES
+
+define <4 x i64> @stepvector_v4i64() {
+; CHECK-LABEL: stepvector_v4i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI4_0
+; CHECK-NEXT:    adrp x9, .LCPI4_1
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT:    ldr q1, [x9, :lo12:.LCPI4_1]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
+  ret <4 x i64> %0
+}
+
+define <16 x i32> @stepvector_v16i32() {
+; CHECK-LABEL: stepvector_v16i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI5_0
+; CHECK-NEXT:    adrp x9, .LCPI5_1
+; CHECK-NEXT:    adrp x10, .LCPI5_2
+; CHECK-NEXT:    adrp x11, .LCPI5_3
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI5_0]
+; CHECK-NEXT:    ldr q1, [x9, :lo12:.LCPI5_1]
+; CHECK-NEXT:    ldr q2, [x10, :lo12:.LCPI5_2]
+; CHECK-NEXT:    ldr q3, [x11, :lo12:.LCPI5_3]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
+  ret <16 x i32> %0
+}
+
+define <2 x i32> @stepvector_v2i32() {
+; CHECK-LABEL: stepvector_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI6_0
+; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI6_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <2 x i32> @llvm.experimental.stepvector.v2i32()
+  ret <2 x i32> %0
+}
+
+define <4 x i16> @stepvector_v4i16() {
+; CHECK-LABEL: stepvector_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI7_0
+; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI7_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <4 x i16> @llvm.experimental.stepvector.v4i16()
+  ret <4 x i16> %0
+}
+
+
+; LEGAL PREDICATE TYPES
+
+define <2 x i1> @stepvector_v2i1() {
+; CHECK-LABEL: stepvector_v2i1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI8_0
+; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI8_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <2 x i1> @llvm.experimental.stepvector.v2i1()
+  ret <2 x i1> %0
+}
+
+define <4 x i1> @stepvector_v4i1() {
+; CHECK-LABEL: stepvector_v4i1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.2s, #1, lsl #16
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <4 x i1> @llvm.experimental.stepvector.v4i1()
+  ret <4 x i1> %0
+}
+
+define <8 x i1> @stepvector_v8i1() {
+; CHECK-LABEL: stepvector_v8i1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.4h, #1, lsl #8
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <8 x i1> @llvm.experimental.stepvector.v8i1()
+  ret <8 x i1> %0
+}
+
+define <16 x i1> @stepvector_v16i1() {
+; CHECK-LABEL: stepvector_v16i1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.8h, #1, lsl #8
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <16 x i1> @llvm.experimental.stepvector.v16i1()
+  ret <16 x i1> %0
+}
+
+define <32 x i8> @stepvector_v32i1() {
+; CHECK-LABEL: stepvector_v32i1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.2d, #0xff00ff00ff00ff00
+; CHECK-NEXT:    movi v1.2d, #0xff00ff00ff00ff00
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <32 x i1> @llvm.experimental.stepvector.v32i1()
+  %1 = sext <32 x i1> %0 to <32 x i8>
+  ret <32 x i8> %1
+}
+
+
+declare <2 x i64> @llvm.experimental.stepvector.v2i64()
+declare <4 x i32> @llvm.experimental.stepvector.v4i32()
+declare <8 x i16> @llvm.experimental.stepvector.v8i16()
+declare <16 x i8> @llvm.experimental.stepvector.v16i8()
+declare <2 x i1> @llvm.experimental.stepvector.v2i1()
+declare <4 x i1> @llvm.experimental.stepvector.v4i1()
+declare <8 x i1> @llvm.experimental.stepvector.v8i1()
+declare <16 x i1> @llvm.experimental.stepvector.v16i1()
+
+declare <4 x i64> @llvm.experimental.stepvector.v4i64()
+declare <16 x i32> @llvm.experimental.stepvector.v16i32()
+declare <2 x i32> @llvm.experimental.stepvector.v2i32()
+declare <4 x i16> @llvm.experimental.stepvector.v4i16()
+declare <32 x i1> @llvm.experimental.stepvector.v32i1()
Index: llvm/test/CodeGen/AArch64/sve-stepvector.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-stepvector.ll
@@ -0,0 +1,232 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s --check-prefixes=CHECK
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; LEGAL INTEGER TYPES
+
+define <vscale x 2 x i64> @stepvector_nxv2i64() {
+; CHECK-LABEL: stepvector_nxv2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 4 x i32> @stepvector_nxv4i32() {
+; CHECK-LABEL: stepvector_nxv4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 8 x i16> @stepvector_nxv8i16() {
+; CHECK-LABEL: stepvector_nxv8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 16 x i8> @stepvector_nxv16i8() {
+; CHECK-LABEL: stepvector_nxv16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.b, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+  ret <vscale x 16 x i8> %0
+}
+
+; ILLEGAL INTEGER TYPES
+
+define <vscale x 4 x i64> @stepvector_nxv4i64() {
+; CHECK-LABEL: stepvector_nxv4i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cntd x8
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    add z1.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+  ret <vscale x 4 x i64> %0
+}
+
+define <vscale x 16 x i32> @stepvector_nxv16i32() {
+; CHECK-LABEL: stepvector_nxv16i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cntw x9
+; CHECK-NEXT:    cnth x8
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    mov z1.s, w9
+; CHECK-NEXT:    mov z3.s, w8
+; CHECK-NEXT:    add z1.s, z0.s, z1.s
+; CHECK-NEXT:    add z2.s, z0.s, z3.s
+; CHECK-NEXT:    add z3.s, z1.s, z3.s
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
+  ret <vscale x 16 x i32> %0
+}
+
+define <vscale x 2 x i32> @stepvector_nxv2i32() {
+; CHECK-LABEL: stepvector_nxv2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 2 x i32> @llvm.experimental.stepvector.nxv2i32()
+  ret <vscale x 2 x i32> %0
+}
+
+define <vscale x 4 x i16> @stepvector_nxv4i16() {
+; CHECK-LABEL: stepvector_nxv4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 4 x i16> @llvm.experimental.stepvector.nxv4i16()
+  ret <vscale x 4 x i16> %0
+}
+
+define <vscale x 8 x i8> @stepvector_nxv8i8() {
+; CHECK-LABEL: stepvector_nxv8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 8 x i8> @llvm.experimental.stepvector.nxv8i8()
+  ret <vscale x 8 x i8> %0
+}
+
+; LEGAL PREDICATE TYPES
+
+define <vscale x 2 x i1> @stepvector_nxv2i1() {
+; CHECK-LABEL: stepvector_nxv2i1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    and z0.d, z0.d, #0x1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    cmpne p0.d, p0/z, z0.d, #0
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 2 x i1> @llvm.experimental.stepvector.nxv2i1()
+  ret <vscale x 2 x i1> %0
+}
+
+define <vscale x 4 x i1> @stepvector_nxv4i1() {
+; CHECK-LABEL: stepvector_nxv4i1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    and z0.s, z0.s, #0x1
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 4 x i1> @llvm.experimental.stepvector.nxv4i1()
+  ret <vscale x 4 x i1> %0
+}
+
+define <vscale x 8 x i1> @stepvector_nxv8i1() {
+; CHECK-LABEL: stepvector_nxv8i1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    and z0.h, z0.h, #0x1
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 8 x i1> @llvm.experimental.stepvector.nxv8i1()
+  ret <vscale x 8 x i1> %0
+}
+
+define <vscale x 16 x i1> @stepvector_nxv16i1() {
+; CHECK-LABEL: stepvector_nxv16i1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.b, #0, #1
+; CHECK-NEXT:    and z0.b, z0.b, #0x1
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, #0
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 16 x i1> @llvm.experimental.stepvector.nxv16i1()
+  ret <vscale x 16 x i1> %0
+}
+
+
+; ILLEGAL PREDICATE TYPES
+
+define <vscale x 32 x i1> @stepvector_nxv32i1() {
+; CHECK-LABEL: stepvector_nxv32i1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    index z0.b, #0, #1
+; CHECK-NEXT:    ptrue p1.b
+; CHECK-NEXT:    sbfx x8, x8, #0, #1
+; CHECK-NEXT:    and z0.b, z0.b, #0x1
+; CHECK-NEXT:    whilelo p2.b, xzr, x8
+; CHECK-NEXT:    cmpne p0.b, p1/z, z0.b, #0
+; CHECK-NEXT:    eor p1.b, p1/z, p0.b, p2.b
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 32 x i1> @llvm.experimental.stepvector.nxv32i1()
+  ret <vscale x 32 x i1> %0
+}
+
+define <vscale x 64 x i1> @stepvector_nxv64i1() {
+; CHECK-LABEL: stepvector_nxv64i1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    addvl sp, sp, #-1
+; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
+; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    rdvl x8, #2
+; CHECK-NEXT:    sbfx x8, x8, #0, #1
+; CHECK-NEXT:    index z0.b, #0, #1
+; CHECK-NEXT:    whilelo p4.b, xzr, x8
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    ptrue p3.b
+; CHECK-NEXT:    and z0.b, z0.b, #0x1
+; CHECK-NEXT:    sbfx x8, x8, #0, #1
+; CHECK-NEXT:    cmpne p0.b, p3/z, z0.b, #0
+; CHECK-NEXT:    whilelo p1.b, xzr, x8
+; CHECK-NEXT:    eor p1.b, p3/z, p0.b, p1.b
+; CHECK-NEXT:    eor p2.b, p3/z, p0.b, p4.b
+; CHECK-NEXT:    eor p3.b, p3/z, p1.b, p4.b
+; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
+; CHECK-NEXT:    addvl sp, sp, #1
+; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 64 x i1> @llvm.experimental.stepvector.nxv64i1()
+  ret <vscale x 64 x i1> %0
+}
+
+
+declare <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+declare <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+declare <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+declare <vscale x 2 x i1> @llvm.experimental.stepvector.nxv2i1()
+declare <vscale x 4 x i1> @llvm.experimental.stepvector.nxv4i1()
+declare <vscale x 8 x i1> @llvm.experimental.stepvector.nxv8i1()
+declare <vscale x 16 x i1> @llvm.experimental.stepvector.nxv16i1()
+
+declare <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+declare <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
+declare <vscale x 2 x i32> @llvm.experimental.stepvector.nxv2i32()
+declare <vscale x 8 x i8> @llvm.experimental.stepvector.nxv8i8()
+declare <vscale x 4 x i16> @llvm.experimental.stepvector.nxv4i16()
+declare <vscale x 32 x i1> @llvm.experimental.stepvector.nxv32i1()
+declare <vscale x 64 x i1> @llvm.experimental.stepvector.nxv64i1()
Index: llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
===================================================================
--- llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
+++ llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
@@ -648,4 +648,18 @@
   EXPECT_DEATH(getTypeAction(FromVT), "Cannot legalize this vector");
 }
 
+TEST_F(AArch64SelectionDAGTest, TestFold_STEP_VECTOR) {
+  if (!TM)
+    return;
+
+  SDLoc Loc;
+  auto IntVT = EVT::getIntegerVT(Context, 8);
+  auto VecVT = EVT::getVectorVT(Context, MVT::i8, 16, true);
+
+  // Should create SPLAT_VECTOR
+  SDValue Zero = DAG->getConstant(0, Loc, IntVT);
+  SDValue Op = DAG->getNode(ISD::STEP_VECTOR, Loc, VecVT, Zero);
+  EXPECT_EQ(Op.getOpcode(), ISD::SPLAT_VECTOR);
+}
+
 } // end namespace llvm
Index: llvm/unittests/IR/IRBuilderTest.cpp
===================================================================
--- llvm/unittests/IR/IRBuilderTest.cpp
+++ llvm/unittests/IR/IRBuilderTest.cpp
@@ -180,6 +180,32 @@
     EXPECT_EQ(FTy->getParamType(i), ArgTys[i]->getType());
 }
 
+TEST_F(IRBuilderTest, CreateStepVector) {
+  IRBuilder<> Builder(BB);
+
+  // Fixed width vectors
+  Type *DstVecTy = VectorType::get(Builder.getInt32Ty(), 4, false);
+  Value *StepVec = Builder.CreateStepVector(DstVecTy);
+  EXPECT_TRUE(isa<Constant>(StepVec));
+  EXPECT_EQ(StepVec->getType(), DstVecTy);
+
+  const auto *VectorValue = cast<Constant>(StepVec);
+  for (unsigned i = 0; i < 4; i++) {
+    EXPECT_TRUE(isa<ConstantInt>(VectorValue->getAggregateElement(i)));
+    ConstantInt *El = cast<ConstantInt>(VectorValue->getAggregateElement(i));
+    EXPECT_EQ(El->getValue(), i);
+  }
+
+  // Scalable vectors
+  DstVecTy = VectorType::get(Builder.getInt32Ty(), 4, true);
+  StepVec = Builder.CreateStepVector(DstVecTy);
+  EXPECT_TRUE(isa<CallInst>(StepVec));
+  CallInst *Call = cast<CallInst>(StepVec);
+  FunctionType *FTy = Call->getFunctionType();
+  EXPECT_EQ(FTy->getReturnType(), DstVecTy);
+  EXPECT_EQ(Call->getIntrinsicID(), Intrinsic::experimental_stepvector);
+}
+
 TEST_F(IRBuilderTest, ConstrainedFP) {
   IRBuilder<> Builder(BB);
   Value *V;