Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -16596,6 +16596,36 @@
 the source/result vector. The ``imm`` is a signed integer constant in the range
 ``-VL <= imm < VL``. For values outside of this range the result is poison.
 
+
+'``llvm.experimental.stepvector``' Intrinsic
+
+This is an overloaded intrinsic. You can use ``llvm.experimental.stepvector``
+to generate a vector whose lane values comprise the linear sequence
+<0, 1, 2, ...>. It is primarily intended for scalable vectors.
+
+::
+
+      declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+      declare <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+
+The '``llvm.experimental.stepvector``' intrinsics are used to create vectors
+of integers whose elements contain a linear sequence of values starting from 0
+with a step of 1.  This experimental intrinsic can only be used for vectors
+with integer elements that are at least 8 bits in size. If the sequence value
+exceeds the allowed limit for the element type then the result for that lane is
+undefined.
+
+These intrinsics work for both fixed and scalable vectors. While this intrinsic
+is marked as experimental, the recommended way to express this operation for
+fixed-width vectors is still to generate a constant vector instead.
+
+
+Arguments:
+""""""""""
+
+None.
+
+
 Matrix Intrinsics
 -----------------
 
Index: llvm/include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1249,6 +1249,12 @@
       return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
                                              VarMask, Alignment, CostKind, I);
     }
+    case Intrinsic::experimental_stepvector: {
+      if (isa<ScalableVectorType>(RetTy))
+        return BaseT::getIntrinsicInstrCost(ICA, CostKind);
+      // The cost of materialising a constant integer vector.
+      return TargetTransformInfo::TCC_Basic;
+    }
     case Intrinsic::experimental_vector_extract: {
       // FIXME: Handle case where a scalable vector is extracted from a scalable
       // vector
Index: llvm/include/llvm/CodeGen/ISDOpcodes.h
===================================================================
--- llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -592,6 +592,14 @@
   /// scalars should have the same type.
   SPLAT_VECTOR_PARTS,
 
+  /// STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised
+  /// of a linear sequence of unsigned values starting from 0 with a step of
+  /// IMM, where IMM must be a constant positive integer value. The operation
+  /// does not support returning fixed-width vectors or non-constant operands.
+  /// If the sequence value exceeds the limit allowed for the element type then
+  /// the values for those lanes are undefined.
+  STEP_VECTOR,
+
   /// MULHU/MULHS - Multiply high - Multiply two integers of type iN,
   /// producing an unsigned/signed value of type i[2*N], then return the top
   /// part.
Index: llvm/include/llvm/CodeGen/SelectionDAG.h
===================================================================
--- llvm/include/llvm/CodeGen/SelectionDAG.h
+++ llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -833,6 +833,10 @@
     return getNode(ISD::SPLAT_VECTOR, DL, VT, Op);
   }
 
+  /// Returns a vector of type ResVT whose elements contain the linear sequence
+  ///   <0, Step, Step * 2, Step * 3, ...>
+  SDValue getStepVector(const SDLoc &DL, EVT ResVT, SDValue Step);
+
   /// Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to
   /// the shuffle node in input but with swapped operands.
   ///
Index: llvm/include/llvm/IR/IRBuilder.h
===================================================================
--- llvm/include/llvm/IR/IRBuilder.h
+++ llvm/include/llvm/IR/IRBuilder.h
@@ -854,6 +854,9 @@
   /// will be the same type as that of \p Scaling.
   Value *CreateVScale(Constant *Scaling, const Twine &Name = "");
 
+  /// Creates a vector of type \p DstType with the linear sequence <0, 1, ...>
+  Value *CreateStepVector(Type *DstType, const Twine &Name = "");
+
   /// Create a call to intrinsic \p ID with 1 operand which is mangled on its
   /// type.
   CallInst *CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V,
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -1336,6 +1336,9 @@
 def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_anyint_ty],
                            [IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
 
+def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+                                                        [], [IntrNoMem]>;
+
 //===---------------- Vector Predication Intrinsics --------------===//
 
 // Speculatable Binary operators
Index: llvm/include/llvm/Target/TargetSelectionDAG.td
===================================================================
--- llvm/include/llvm/Target/TargetSelectionDAG.td
+++ llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -661,6 +661,8 @@
 def vector_splice : SDNode<"ISD::VECTOR_SPLICE", SDTVecSlice, []>;
 def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
 def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>;
+def step_vector : SDNode<"ISD::STEP_VECTOR", SDTypeProfile<1, 1,
+                       [SDTCisVec<0>, SDTCisInt<1>]>, []>;
 def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
                               []>;
 
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -110,6 +110,7 @@
                          Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SPLAT_VECTOR:
                          Res = PromoteIntRes_SPLAT_VECTOR(N); break;
+  case ISD::STEP_VECTOR: Res = PromoteIntRes_STEP_VECTOR(N); break;
   case ISD::CONCAT_VECTORS:
                          Res = PromoteIntRes_CONCAT_VECTORS(N); break;
 
@@ -4782,6 +4783,18 @@
   return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) {
+  SDLoc dl(N);
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  assert(NOutVT.isVector() && "Type must be promoted to a vector type");
+  EVT NOutElemVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                            NOutVT.getVectorElementType());
+  APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue();
+  SDValue Step = DAG.getConstant(StepVal.getZExtValue(), dl, NOutElemVT);
+  return DAG.getStepVector(dl, NOutVT, Step);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
   SDLoc dl(N);
 
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -304,6 +304,7 @@
   SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
   SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
   SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);
+  SDValue PromoteIntRes_STEP_VECTOR(SDNode *N);
   SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
   SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
@@ -836,6 +837,7 @@
   void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -928,6 +928,9 @@
   case ISD::SCALAR_TO_VECTOR:
     SplitVecRes_ScalarOp(N, Lo, Hi);
     break;
+  case ISD::STEP_VECTOR:
+    SplitVecRes_STEP_VECTOR(N, Lo, Hi);
+    break;
   case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
   case ISD::LOAD:
     SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
@@ -1639,6 +1642,30 @@
     Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
 }
 
+void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo,
+                                               SDValue &Hi) {
+  EVT LoVT, HiVT;
+  SDLoc dl(N);
+  assert(N->getValueType(0).isScalableVector() &&
+         "Only scalable vectors are supported for STEP_VECTOR");
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  SDValue Step = N->getOperand(0);
+
+  Lo = DAG.getNode(ISD::STEP_VECTOR, dl, LoVT, Step);
+
+  // Hi = Lo + (EltCnt * Step)
+  EVT EltVT = Step.getValueType();
+  SDValue StartOfHi =
+      DAG.getVScale(dl, EltVT,
+                    cast<ConstantSDNode>(Step)->getAPIntValue() *
+                        LoVT.getVectorMinNumElements());
+  StartOfHi = DAG.getZExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType());
+  StartOfHi = DAG.getNode(ISD::SPLAT_VECTOR, dl, HiVT, StartOfHi);
+
+  Hi = DAG.getNode(ISD::STEP_VECTOR, dl, HiVT, Step);
+  Hi = DAG.getNode(ISD::ADD, dl, HiVT, Hi, StartOfHi);
+}
+
 void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo,
                                             SDValue &Hi) {
   EVT LoVT, HiVT;
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1744,6 +1744,18 @@
   return SDValue(CondCodeNodes[Cond], 0);
 }
 
+SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT, SDValue Step) {
+  if (ResVT.isScalableVector())
+    return getNode(ISD::STEP_VECTOR, DL, ResVT, Step);
+
+  EVT OpVT = Step.getValueType();
+  APInt StepVal = cast<ConstantSDNode>(Step)->getAPIntValue();
+  SmallVector<SDValue, 16> OpsStepConstants;
+  for (uint64_t i = 0; i < ResVT.getVectorNumElements(); i++)
+    OpsStepConstants.push_back(getConstant(StepVal * i, DL, OpVT));
+  return getBuildVector(ResVT, DL, OpsStepConstants);
+}
+
 /// Swaps the values of N1 and N2. Swaps all indices in the shuffle mask M that
 /// point at N1 to point at N2 and indices that point at N2 to point at N1.
 static void commuteShuffle(SDValue &N1, SDValue &N2, MutableArrayRef<int> M) {
@@ -4306,6 +4318,14 @@
   return (computeKnownBits(A).Zero | computeKnownBits(B).Zero).isAllOnesValue();
 }
 
+static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step,
+                               SelectionDAG &DAG) {
+  if (cast<ConstantSDNode>(Step)->isNullValue())
+    return DAG.getConstant(0, DL, VT);
+
+  return SDValue();
+}
+
 static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
                                 ArrayRef<SDValue> Ops,
                                 SelectionDAG &DAG) {
@@ -4527,6 +4547,11 @@
                         APFloat::rmNearestTiesToEven, &Ignored);
       return getConstantFP(FPV, DL, VT);
     }
+    case ISD::STEP_VECTOR: {
+      if (SDValue V = FoldSTEP_VECTOR(DL, VT, Operand, *this))
+        return V;
+      break;
+    }
     }
   }
 
@@ -4636,6 +4661,18 @@
 
   unsigned OpOpcode = Operand.getNode()->getOpcode();
   switch (Opcode) {
+  case ISD::STEP_VECTOR:
+    assert(VT.isScalableVector() &&
+           "STEP_VECTOR can only be used with scalable types");
+    assert(VT.getScalarSizeInBits() >= 8 &&
+           "STEP_VECTOR can only be used with vectors of integers that are at "
+           "least 8 bits wide");
+    assert(Operand.getValueType().bitsGE(VT.getScalarType()) &&
+           "Operand type should be at least as large as the element type");
+    assert(isa<ConstantSDNode>(Operand) &&
+           cast<ConstantSDNode>(Operand)->getAPIntValue().isNonNegative() &&
+           "Expected positive integer constant for STEP_VECTOR");
+    break;
   case ISD::FREEZE:
     assert(VT == Operand.getValueType() && "Unexpected VT!");
     break;
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -779,6 +779,7 @@
   void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
   void visitVectorReverse(const CallInst &I);
   void visitVectorSplice(const CallInst &I);
+  void visitStepVector(const CallInst &I);
 
   void visitUserOp1(const Instruction &I) {
     llvm_unreachable("UserOp1 should not exist at instruction selection time!");
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6945,7 +6945,9 @@
   case Intrinsic::experimental_deoptimize:
     LowerDeoptimizeCall(&I);
     return;
-
+  case Intrinsic::experimental_stepvector:
+    visitStepVector(I);
+    return;
   case Intrinsic::vector_reduce_fadd:
   case Intrinsic::vector_reduce_fmul:
   case Intrinsic::vector_reduce_add:
@@ -10929,6 +10931,16 @@
   }
 }
 
+void SelectionDAGBuilder::visitStepVector(const CallInst &I) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  auto DL = getCurSDLoc();
+  EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+  EVT OpVT =
+      TLI.getTypeToTransformTo(*DAG.getContext(), ResultVT.getScalarType());
+  SDValue Step = DAG.getConstant(1, DL, OpVT);
+  setValue(&I, DAG.getStepVector(DL, ResultVT, Step));
+}
+
 void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -292,6 +292,7 @@
   case ISD::SPLAT_VECTOR:               return "splat_vector";
   case ISD::SPLAT_VECTOR_PARTS:         return "splat_vector_parts";
   case ISD::VECTOR_REVERSE:             return "vector_reverse";
+  case ISD::STEP_VECTOR:                return "step_vector";
   case ISD::CARRY_FALSE:                return "carry_false";
   case ISD::ADDC:                       return "addc";
   case ISD::ADDE:                       return "adde";
Index: llvm/lib/IR/IRBuilder.cpp
===================================================================
--- llvm/lib/IR/IRBuilder.cpp
+++ llvm/lib/IR/IRBuilder.cpp
@@ -91,6 +91,23 @@
              : CreateMul(CI, Scaling);
 }
 
+Value *IRBuilderBase::CreateStepVector(Type *DstType, const Twine &Name) {
+  if (isa<ScalableVectorType>(DstType))
+    return CreateIntrinsic(Intrinsic::experimental_stepvector, {DstType}, {},
+                           nullptr, Name);
+
+  Type *STy = DstType->getScalarType();
+  unsigned NumEls = cast<FixedVectorType>(DstType)->getNumElements();
+
+  // Create a vector of consecutive numbers from zero to VF.
+  SmallVector<Constant *, 8> Indices;
+  for (unsigned i = 0; i < NumEls; ++i)
+    Indices.push_back(ConstantInt::get(STy, i));
+
+  // Add the consecutive indices to the vector value.
+  return ConstantVector::get(Indices);
+}
+
 CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size,
                                       MaybeAlign Align, bool isVolatile,
                                       MDNode *TBAATag, MDNode *ScopeTag,
Index: llvm/lib/IR/Verifier.cpp
===================================================================
--- llvm/lib/IR/Verifier.cpp
+++ llvm/lib/IR/Verifier.cpp
@@ -5185,6 +5185,15 @@
 
     break;
   }
+  case Intrinsic::experimental_stepvector: {
+    VectorType *VecTy = dyn_cast<VectorType>(Call.getType());
+    Assert(VecTy && VecTy->getScalarType()->isIntegerTy() &&
+               VecTy->getScalarSizeInBits() >= 8,
+           "experimental_stepvector only supported for vectors of integers "
+           "with a bitwidth of at least 8.",
+           &Call);
+    break;
+  }
   case Intrinsic::experimental_vector_insert: {
     VectorType *VecTy = cast<VectorType>(Call.getArgOperand(0)->getType());
     VectorType *SubVecTy = cast<VectorType>(Call.getArgOperand(1)->getType());
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -936,6 +936,7 @@
   SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG, unsigned NewOp,
                               bool OverrideNEON = false) const;
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1127,6 +1127,7 @@
       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+      setOperationAction(ISD::STEP_VECTOR, VT, Custom);
 
       setOperationAction(ISD::MULHU, VT, Expand);
       setOperationAction(ISD::MULHS, VT, Expand);
@@ -4392,6 +4393,8 @@
     return LowerVECTOR_SHUFFLE(Op, DAG);
   case ISD::SPLAT_VECTOR:
     return LowerSPLAT_VECTOR(Op, DAG);
+  case ISD::STEP_VECTOR:
+    return LowerSTEP_VECTOR(Op, DAG);
   case ISD::EXTRACT_SUBVECTOR:
     return LowerEXTRACT_SUBVECTOR(Op, DAG);
   case ISD::INSERT_SUBVECTOR:
@@ -9039,6 +9042,21 @@
   return GenerateTBL(Op, ShuffleMask, DAG);
 }
 
+SDValue AArch64TargetLowering::LowerSTEP_VECTOR(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
+  assert(VT.isScalableVector() &&
+         "Only expect scalable vectors for STEP_VECTOR");
+  EVT ElemVT = VT.getScalarType();
+  assert(ElemVT != MVT::i1 &&
+         "Vectors of i1 types not supported for STEP_VECTOR");
+
+  SDValue StepVal = Op.getOperand(0);
+  SDValue Zero = DAG.getConstant(0, dl, StepVal.getValueType());
+  return DAG.getNode(AArch64ISD::INDEX_VECTOR, dl, VT, Zero, StepVal);
+}
+
 SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
                                                  SelectionDAG &DAG) const {
   SDLoc dl(Op);
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -260,6 +260,19 @@
       return LT.first;
     break;
   }
+  case Intrinsic::experimental_stepvector: {
+    unsigned Cost = 1; // Cost of the `index' instruction
+    auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
+    // Legalisation of illegal vectors involves an `index' instruction plus
+    // (LT.first - 1) vector adds.
+    if (LT.first > 1) {
+      Type *LegalVTy = EVT(LT.second).getTypeForEVT(RetTy->getContext());
+      unsigned AddCost =
+          getArithmeticInstrCost(Instruction::Add, LegalVTy, CostKind);
+      Cost += AddCost * (LT.first - 1);
+    }
+    return Cost;
+  }
   default:
     break;
   }
Index: llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll
===================================================================
--- /dev/null
+++ llvm/test/Analysis/CostModel/AArch64/neon-stepvector.ll
@@ -0,0 +1,34 @@
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+neon  < %s | FileCheck %s
+
+; Check icmp for legal integer vectors.
+define void @stepvector_legal_int() {
+; CHECK-LABEL: 'stepvector_legal_int'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %2 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %3 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
+  %1 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
+  %2 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
+  %3 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
+  %4 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
+  ret void
+}
+
+; Check icmp for an illegal integer vector.
+define void @stepvector_illegal_int() {
+; CHECK-LABEL: 'stepvector_illegal_int'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %1 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %2 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
+  %1 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
+  %2 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
+  ret void
+}
+
+
+declare <2 x i64> @llvm.experimental.stepvector.v2i64()
+declare <4 x i32> @llvm.experimental.stepvector.v4i32()
+declare <8 x i16> @llvm.experimental.stepvector.v8i16()
+declare <16 x i8> @llvm.experimental.stepvector.v16i8()
+
+declare <4 x i64> @llvm.experimental.stepvector.v4i64()
+declare <16 x i32> @llvm.experimental.stepvector.v16i32()
Index: llvm/test/Analysis/CostModel/AArch64/sve-stepvector.ll
===================================================================
--- /dev/null
+++ llvm/test/Analysis/CostModel/AArch64/sve-stepvector.ll
@@ -0,0 +1,39 @@
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve  < %s 2>%t | FileCheck %s
+
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; Check icmp for legal integer vectors.
+define void @stepvector_legal_int() {
+; CHECK-LABEL: 'stepvector_legal_int'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %1 = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %2 = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %3 = call <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction:   %4 = call <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+  %1 = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+  %2 = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+  %3 = call <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+  %4 = call <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+  ret void
+}
+
+; Check icmp for an illegal integer vector.
+define void @stepvector_illegal_int() {
+; CHECK-LABEL: 'stepvector_illegal_int'
+; CHECK: Cost Model: Found an estimated cost of 2 for instruction:   %1 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; CHECK: Cost Model: Found an estimated cost of 4 for instruction:   %2 = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
+  %1 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+  %2 = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
+  ret void
+}
+
+
+declare <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+declare <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+declare <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+
+declare <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+declare <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
Index: llvm/test/CodeGen/AArch64/neon-stepvector.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/neon-stepvector.ll
@@ -0,0 +1,181 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK
+
+; LEGAL INTEGER TYPES
+
+define <2 x i64> @stepvector_v2i64() {
+; CHECK-LABEL: .LCPI0_0:
+; CHECK-NEXT:    .xword 0
+; CHECK-NEXT:    .xword 1
+; CHECK-LABEL: stepvector_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI0_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <2 x i64> @llvm.experimental.stepvector.v2i64()
+  ret <2 x i64> %0
+}
+
+define <4 x i32> @stepvector_v4i32() {
+; CHECK-LABEL: .LCPI1_0:
+; CHECK-NEXT:    .word 0
+; CHECK-NEXT:    .word 1
+; CHECK-NEXT:    .word 2
+; CHECK-NEXT:    .word 3
+; CHECK-LABEL: stepvector_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI1_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI1_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <4 x i32> @llvm.experimental.stepvector.v4i32()
+  ret <4 x i32> %0
+}
+
+define <8 x i16> @stepvector_v8i16() {
+; CHECK-LABEL: .LCPI2_0:
+; CHECK-NEXT:    .hword 0
+; CHECK-NEXT:    .hword 1
+; CHECK-NEXT:    .hword 2
+; CHECK-NEXT:    .hword 3
+; CHECK-NEXT:    .hword 4
+; CHECK-NEXT:    .hword 5
+; CHECK-NEXT:    .hword 6
+; CHECK-NEXT:    .hword 7
+; CHECK-LABEL: stepvector_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI2_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI2_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <8 x i16> @llvm.experimental.stepvector.v8i16()
+  ret <8 x i16> %0
+}
+
+define <16 x i8> @stepvector_v16i8() {
+; CHECK-LABEL: .LCPI3_0:
+; CHECK-NEXT:    .byte 0
+; CHECK-NEXT:    .byte 1
+; CHECK-NEXT:    .byte 2
+; CHECK-NEXT:    .byte 3
+; CHECK-NEXT:    .byte 4
+; CHECK-NEXT:    .byte 5
+; CHECK-NEXT:    .byte 6
+; CHECK-NEXT:    .byte 7
+; CHECK-NEXT:    .byte 8
+; CHECK-NEXT:    .byte 9
+; CHECK-NEXT:    .byte 10
+; CHECK-NEXT:    .byte 11
+; CHECK-NEXT:    .byte 12
+; CHECK-NEXT:    .byte 13
+; CHECK-NEXT:    .byte 14
+; CHECK-NEXT:    .byte 15
+; CHECK-LABEL: stepvector_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI3_0
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <16 x i8> @llvm.experimental.stepvector.v16i8()
+  ret <16 x i8> %0
+}
+
+; ILLEGAL INTEGER TYPES
+
+define <4 x i64> @stepvector_v4i64() {
+; CHECK-LABEL: .LCPI4_0:
+; CHECK-NEXT:    .xword 0
+; CHECK-NEXT:    .xword 1
+; CHECK-LABEL: .LCPI4_1:
+; CHECK-NEXT:    .xword 2
+; CHECK-NEXT:    .xword 3
+; CHECK-LABEL: stepvector_v4i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI4_0
+; CHECK-NEXT:    adrp x9, .LCPI4_1
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT:    ldr q1, [x9, :lo12:.LCPI4_1]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <4 x i64> @llvm.experimental.stepvector.v4i64()
+  ret <4 x i64> %0
+}
+
+define <16 x i32> @stepvector_v16i32() {
+; CHECK-LABEL: .LCPI5_0:
+; CHECK-NEXT:    .word 0
+; CHECK-NEXT:    .word 1
+; CHECK-NEXT:    .word 2
+; CHECK-NEXT:    .word 3
+; CHECK-LABEL: .LCPI5_1:
+; CHECK-NEXT:    .word 4
+; CHECK-NEXT:    .word 5
+; CHECK-NEXT:    .word 6
+; CHECK-NEXT:    .word 7
+; CHECK-LABEL: .LCPI5_2:
+; CHECK-NEXT:    .word 8
+; CHECK-NEXT:    .word 9
+; CHECK-NEXT:    .word 10
+; CHECK-NEXT:    .word 11
+; CHECK-LABEL: .LCPI5_3:
+; CHECK-NEXT:    .word 12
+; CHECK-NEXT:    .word 13
+; CHECK-NEXT:    .word 14
+; CHECK-NEXT:    .word 15
+; CHECK-LABEL: stepvector_v16i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI5_0
+; CHECK-NEXT:    adrp x9, .LCPI5_1
+; CHECK-NEXT:    adrp x10, .LCPI5_2
+; CHECK-NEXT:    adrp x11, .LCPI5_3
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI5_0]
+; CHECK-NEXT:    ldr q1, [x9, :lo12:.LCPI5_1]
+; CHECK-NEXT:    ldr q2, [x10, :lo12:.LCPI5_2]
+; CHECK-NEXT:    ldr q3, [x11, :lo12:.LCPI5_3]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <16 x i32> @llvm.experimental.stepvector.v16i32()
+  ret <16 x i32> %0
+}
+
+define <2 x i32> @stepvector_v2i32() {
+; CHECK-LABEL: .LCPI6_0:
+; CHECK-NEXT:    .word 0
+; CHECK-NEXT:    .word 1
+; CHECK-LABEL: stepvector_v2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI6_0
+; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI6_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <2 x i32> @llvm.experimental.stepvector.v2i32()
+  ret <2 x i32> %0
+}
+
+define <4 x i16> @stepvector_v4i16() {
+; CHECK-LABEL: .LCPI7_0:
+; CHECK-NEXT:    .hword 0
+; CHECK-NEXT:    .hword 1
+; CHECK-NEXT:    .hword 2
+; CHECK-NEXT:    .hword 3
+; CHECK-LABEL: stepvector_v4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    adrp x8, .LCPI7_0
+; CHECK-NEXT:    ldr d0, [x8, :lo12:.LCPI7_0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <4 x i16> @llvm.experimental.stepvector.v4i16()
+  ret <4 x i16> %0
+}
+
+
+declare <2 x i64> @llvm.experimental.stepvector.v2i64()
+declare <4 x i32> @llvm.experimental.stepvector.v4i32()
+declare <8 x i16> @llvm.experimental.stepvector.v8i16()
+declare <16 x i8> @llvm.experimental.stepvector.v16i8()
+
+declare <4 x i64> @llvm.experimental.stepvector.v4i64()
+declare <16 x i32> @llvm.experimental.stepvector.v16i32()
+declare <2 x i32> @llvm.experimental.stepvector.v2i32()
+declare <4 x i16> @llvm.experimental.stepvector.v4i16()
Index: llvm/test/CodeGen/AArch64/sve-stepvector.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-stepvector.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s --check-prefixes=CHECK
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s < %t
+
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; LEGAL INTEGER TYPES
+
+define <vscale x 2 x i64> @stepvector_nxv2i64() {
+; CHECK-LABEL: stepvector_nxv2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+  ret <vscale x 2 x i64> %0
+}
+
+define <vscale x 4 x i32> @stepvector_nxv4i32() {
+; CHECK-LABEL: stepvector_nxv4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+  ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 8 x i16> @stepvector_nxv8i16() {
+; CHECK-LABEL: stepvector_nxv8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+  ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 16 x i8> @stepvector_nxv16i8() {
+; CHECK-LABEL: stepvector_nxv16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.b, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+  ret <vscale x 16 x i8> %0
+}
+
+; ILLEGAL INTEGER TYPES
+
+define <vscale x 4 x i64> @stepvector_nxv4i64() {
+; CHECK-LABEL: stepvector_nxv4i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cntd x8
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    add z1.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+  ret <vscale x 4 x i64> %0
+}
+
+define <vscale x 16 x i32> @stepvector_nxv16i32() {
+; CHECK-LABEL: stepvector_nxv16i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cntw x9
+; CHECK-NEXT:    cnth x8
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    mov z1.s, w9
+; CHECK-NEXT:    mov z3.s, w8
+; CHECK-NEXT:    add z1.s, z0.s, z1.s
+; CHECK-NEXT:    add z2.s, z0.s, z3.s
+; CHECK-NEXT:    add z3.s, z1.s, z3.s
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
+  ret <vscale x 16 x i32> %0
+}
+
+define <vscale x 2 x i32> @stepvector_nxv2i32() {
+; CHECK-LABEL: stepvector_nxv2i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.d, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 2 x i32> @llvm.experimental.stepvector.nxv2i32()
+  ret <vscale x 2 x i32> %0
+}
+
+define <vscale x 4 x i16> @stepvector_nxv4i16() {
+; CHECK-LABEL: stepvector_nxv4i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.s, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 4 x i16> @llvm.experimental.stepvector.nxv4i16()
+  ret <vscale x 4 x i16> %0
+}
+
+define <vscale x 8 x i8> @stepvector_nxv8i8() {
+; CHECK-LABEL: stepvector_nxv8i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    index z0.h, #0, #1
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 8 x i8> @llvm.experimental.stepvector.nxv8i8()
+  ret <vscale x 8 x i8> %0
+}
+
+declare <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+declare <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32()
+declare <vscale x 8 x i16> @llvm.experimental.stepvector.nxv8i16()
+declare <vscale x 16 x i8> @llvm.experimental.stepvector.nxv16i8()
+
+declare <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+declare <vscale x 16 x i32> @llvm.experimental.stepvector.nxv16i32()
+declare <vscale x 2 x i32> @llvm.experimental.stepvector.nxv2i32()
+declare <vscale x 8 x i8> @llvm.experimental.stepvector.nxv8i8()
+declare <vscale x 4 x i16> @llvm.experimental.stepvector.nxv4i16()
Index: llvm/test/Verifier/stepvector-intrinsic.ll
===================================================================
--- /dev/null
+++ llvm/test/Verifier/stepvector-intrinsic.ll
@@ -0,0 +1,29 @@
+; RUN: not opt -S -verify < %s 2>&1 | FileCheck %s
+
+; Reject stepvector intrinsics that return a scalar
+
+define i32 @stepvector_i32() {
+; CHECK: Intrinsic has incorrect return type!
+  %1 = call i32 @llvm.experimental.stepvector.i32()
+  ret i32 %1
+}
+
+; Reject vectors with non-integer elements
+
+define <vscale x 4 x float> @stepvector_float() {
+; CHECK: experimental_stepvector only supported for vectors of integers with a bitwidth of at least 8
+  %1 = call <vscale x 4 x float> @llvm.experimental.stepvector.nxv4f32()
+  ret <vscale x 4 x float> %1
+}
+
+; Reject vectors of integers less than 8 bits in width
+
+define <vscale x 16 x i1> @stepvector_i1() {
+; CHECK: experimental_stepvector only supported for vectors of integers with a bitwidth of at least 8
+  %1 = call <vscale x 16 x i1> @llvm.experimental.stepvector.nxv16i1()
+  ret <vscale x 16 x i1> %1
+}
+
+declare i32 @llvm.experimental.stepvector.i32()
+declare <vscale x 4 x float> @llvm.experimental.stepvector.nxv4f32()
+declare <vscale x 16 x i1> @llvm.experimental.stepvector.nxv16i1()
Index: llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
===================================================================
--- llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
+++ llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp
@@ -648,4 +648,18 @@
   EXPECT_DEATH(getTypeAction(FromVT), "Cannot legalize this vector");
 }
 
+TEST_F(AArch64SelectionDAGTest, TestFold_STEP_VECTOR) {
+  if (!TM)
+    return;
+
+  SDLoc Loc;
+  auto IntVT = EVT::getIntegerVT(Context, 8);
+  auto VecVT = EVT::getVectorVT(Context, MVT::i8, 16, true);
+
+  // Should create SPLAT_VECTOR
+  SDValue Zero = DAG->getConstant(0, Loc, IntVT);
+  SDValue Op = DAG->getNode(ISD::STEP_VECTOR, Loc, VecVT, Zero);
+  EXPECT_EQ(Op.getOpcode(), ISD::SPLAT_VECTOR);
+}
+
 } // end namespace llvm
Index: llvm/unittests/IR/IRBuilderTest.cpp
===================================================================
--- llvm/unittests/IR/IRBuilderTest.cpp
+++ llvm/unittests/IR/IRBuilderTest.cpp
@@ -180,6 +180,32 @@
     EXPECT_EQ(FTy->getParamType(i), ArgTys[i]->getType());
 }
 
+TEST_F(IRBuilderTest, CreateStepVector) {
+  IRBuilder<> Builder(BB);
+
+  // Fixed width vectors
+  Type *DstVecTy = VectorType::get(Builder.getInt32Ty(), 4, false);
+  Value *StepVec = Builder.CreateStepVector(DstVecTy);
+  EXPECT_TRUE(isa<Constant>(StepVec));
+  EXPECT_EQ(StepVec->getType(), DstVecTy);
+
+  const auto *VectorValue = cast<Constant>(StepVec);
+  for (unsigned i = 0; i < 4; i++) {
+    EXPECT_TRUE(isa<ConstantInt>(VectorValue->getAggregateElement(i)));
+    ConstantInt *El = cast<ConstantInt>(VectorValue->getAggregateElement(i));
+    EXPECT_EQ(El->getValue(), i);
+  }
+
+  // Scalable vectors
+  DstVecTy = VectorType::get(Builder.getInt32Ty(), 4, true);
+  StepVec = Builder.CreateStepVector(DstVecTy);
+  EXPECT_TRUE(isa<CallInst>(StepVec));
+  CallInst *Call = cast<CallInst>(StepVec);
+  FunctionType *FTy = Call->getFunctionType();
+  EXPECT_EQ(FTy->getReturnType(), DstVecTy);
+  EXPECT_EQ(Call->getIntrinsicID(), Intrinsic::experimental_stepvector);
+}
+
 TEST_F(IRBuilderTest, ConstrainedFP) {
   IRBuilder<> Builder(BB);
   Value *V;