Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -17722,6 +17722,37 @@
 mask argument does not match the pointer size of the target, the mask is
 zero-extended or truncated accordingly.
 
+.. _int_vscale:
+
+'``llvm.vscale``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare i32 llvm.vscale(i32 %scaling) readnone
+
+Arguments:
+""""""""""
+
+The argument is a i32 immediate.
+
+Overview:
+"""""""""
+
+The ``llvm.vscale`` intrinsic returns the value for ``vscale`` in scalable
+vectors such as ``<vscale x 16 x i8>``.
+
+Semantics:
+""""""""""
+
+``vscale`` is a positive value that is constant throughout the program
+but is unknown at compile time. The ``scaling`` immediate can be used
+for convenience and is multiplied at runtime with ``vscale``.
+
+
 Stack Map Intrinsics
 --------------------
 
@@ -17735,6 +17766,7 @@
 These intrinsics are similar to the standard library memory intrinsics except
 that they perform memory transfer as a sequence of atomic memory accesses.
 
+
 .. _int_memcpy_element_unordered_atomic:
 
 '``llvm.memcpy.element.unordered.atomic``' Intrinsic
Index: llvm/include/llvm/CodeGen/ISDOpcodes.h
===================================================================
--- llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -915,6 +915,12 @@
     /// known nonzero constant. The only operand here is the chain.
     GET_DYNAMIC_AREA_OFFSET,
 
+    /// VSCALE(IMM) - Returns the runtime scaling factor used to calculate the
+    /// number of elements within a scalable vector.  IMM is a constant integer
+    /// multiplier that is applied to the runtime value and is usually some
+    /// multiple of MVT.getVectorNumElements().
+    VSCALE,
+
     /// Generic reduction nodes. These nodes represent horizontal vector
     /// reduction operations, producing a scalar result.
     /// The STRICT variants perform reductions in sequential order. The first
Index: llvm/include/llvm/CodeGen/SelectionDAG.h
===================================================================
--- llvm/include/llvm/CodeGen/SelectionDAG.h
+++ llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -912,6 +912,11 @@
     return getNode(ISD::UNDEF, SDLoc(), VT);
   }
 
+  /// Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
+  SDValue getVScale(const SDLoc &DL, EVT VT, int64_t MulImm=1) {
+    return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
+  }
+
   /// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
   SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
     return getNode(ISD::GLOBAL_OFFSET_TABLE, SDLoc(), VT);
Index: llvm/include/llvm/IR/Intrinsics.td
===================================================================
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -1331,6 +1331,11 @@
                                                  [IntrNoMem, ImmArg<1>,
                                                   ImmArg<2>]>;
 
+//===---------- Intrinsics to query properties of scalable vectors --------===//
+def int_vscale : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, ImmArg<0>]>;
+
+//===----------------------------------------------------------------------===//
+
 //===----------------------------------------------------------------------===//
 // Target-specific intrinsics
 //===----------------------------------------------------------------------===//
Index: llvm/include/llvm/IR/PatternMatch.h
===================================================================
--- llvm/include/llvm/IR/PatternMatch.h
+++ llvm/include/llvm/IR/PatternMatch.h
@@ -32,6 +32,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
@@ -2002,6 +2003,49 @@
   return ExtractValue_match<Ind, Val_t>(V);
 }
 
+/// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or
+/// the constant expression
+///  `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 %scaling>`
+/// under the right conditions determined by DataLayout.
+struct VScaleVal_match {
+private:
+  template <typename Base, typename Offset>
+  inline BinaryOp_match<Base, Offset, Instruction::GetElementPtr>
+  m_OffsetGep(const Base &B, const Offset &O) {
+    return BinaryOp_match<Base, Offset, Instruction::GetElementPtr>(B, O);
+  }
+
+public:
+  const DataLayout &DL;
+  int &Val;
+  VScaleVal_match(const DataLayout &DL, int &S) : DL(DL), Val(S) {}
+
+  template <typename ITy> bool match(ITy *V) {
+    uint64_t S;
+
+    if (m_Intrinsic<Intrinsic::vscale>(m_ConstantInt(S)).match(V)) {
+      Val = (int)S;
+      return true;
+    }
+
+    if (m_PtrToInt(m_OffsetGep(m_Zero(), m_ConstantInt(S))).match(V)) {
+      Type *PtrTy = cast<Operator>(V)->getOperand(0)->getType();
+      Type *DerefTy = PtrTy->getPointerElementType();
+      if (DerefTy->isVectorTy() && DerefTy->getVectorIsScalable() &&
+          DL.getTypeAllocSizeInBits(DerefTy).getKnownMinSize() == 8) {
+        Val = (int)S;
+        return true;
+      }
+    }
+
+    return false;
+  }
+};
+
+inline VScaleVal_match m_VScale(const DataLayout &DL, int &S) {
+  return VScaleVal_match(DL, S);
+}
+
 } // end namespace PatternMatch
 } // end namespace llvm
 
Index: llvm/include/llvm/Target/TargetSelectionDAG.td
===================================================================
--- llvm/include/llvm/Target/TargetSelectionDAG.td
+++ llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -316,6 +316,7 @@
 def bb         : SDNode<"ISD::BasicBlock", SDTOther   , [], "BasicBlockSDNode">;
 def cond       : SDNode<"ISD::CONDCODE"  , SDTOther   , [], "CondCodeSDNode">;
 def undef      : SDNode<"ISD::UNDEF"     , SDTUNDEF   , []>;
+def vscale     : SDNode<"ISD::VSCALE"    , SDTIntUnaryOp, []>;
 def globaladdr : SDNode<"ISD::GlobalAddress",         SDTPtrLeaf, [],
                         "GlobalAddressSDNode">;
 def tglobaladdr : SDNode<"ISD::TargetGlobalAddress",  SDTPtrLeaf, [],
Index: llvm/lib/Analysis/ConstantFolding.cpp
===================================================================
--- llvm/lib/Analysis/ConstantFolding.cpp
+++ llvm/lib/Analysis/ConstantFolding.cpp
@@ -828,7 +828,8 @@
   Type *SrcElemTy = GEP->getSourceElementType();
   Type *ResElemTy = GEP->getResultElementType();
   Type *ResTy = GEP->getType();
-  if (!SrcElemTy->isSized())
+  if (!SrcElemTy->isSized() ||
+      (SrcElemTy->isVectorTy() && SrcElemTy->getVectorIsScalable()))
     return nullptr;
 
   if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy,
Index: llvm/lib/Analysis/ValueTracking.cpp
===================================================================
--- llvm/lib/Analysis/ValueTracking.cpp
+++ llvm/lib/Analysis/ValueTracking.cpp
@@ -1395,7 +1395,8 @@
       } else {
         // Handle array index arithmetic.
         Type *IndexedTy = GTI.getIndexedType();
-        if (!IndexedTy->isSized()) {
+        if (!IndexedTy->isSized() ||
+            (IndexedTy->isVectorTy() && IndexedTy->getVectorIsScalable())) {
           TrailZ = 0;
           break;
         }
Index: llvm/lib/CodeGen/CodeGenPrepare.cpp
===================================================================
--- llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -2010,6 +2010,21 @@
       return despeculateCountZeros(II, TLI, DL, ModifiedDT);
     case Intrinsic::dbg_value:
       return fixupDbgValue(II);
+    case Intrinsic::vscale: {
+      // If datalayout has no special restrictions on vector data layout,
+      // replace `llvm.vscale` by an equivalent constant expression
+      // to benefit from cheap constant propagation.
+      Type *ScalableVectorTy =
+          VectorType::get(Type::getInt8Ty(II->getContext()), 1, true);
+      if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinSize() == 8) {
+        auto Null = Constant::getNullValue(ScalableVectorTy->getPointerTo());
+        auto *CGep = ConstantExpr::getGetElementPtr(ScalableVectorTy, Null,
+                                                    II->getOperand(0));
+        II->replaceAllUsesWith(ConstantExpr::getPtrToInt(CGep, II->getType()));
+        II->eraseFromParent();
+        return true;
+      }
+    }
     }
 
     if (TLI) {
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -91,6 +91,7 @@
   case ISD::TRUNCATE:    Res = PromoteIntRes_TRUNCATE(N); break;
   case ISD::UNDEF:       Res = PromoteIntRes_UNDEF(N); break;
   case ISD::VAARG:       Res = PromoteIntRes_VAARG(N); break;
+  case ISD::VSCALE:      Res = PromoteIntRes_VSCALE(N); break;
 
   case ISD::EXTRACT_SUBVECTOR:
                          Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
@@ -1113,6 +1114,13 @@
                                                N->getValueType(0)));
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) {
+  EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+  int64_t MulImm = cast<ConstantSDNode>(N->getOperand(0))->getSExtValue();
+  return DAG.getVScale(SDLoc(N), VT, MulImm);
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
   SDValue Chain = N->getOperand(0); // Get the chain.
   SDValue Ptr = N->getOperand(1); // Get the pointer.
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -326,6 +326,7 @@
   SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
   SDValue PromoteIntRes_UNDEF(SDNode *N);
   SDValue PromoteIntRes_VAARG(SDNode *N);
+  SDValue PromoteIntRes_VSCALE(SDNode *N);
   SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
   SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
   SDValue PromoteIntRes_MULFIX(SDNode *N);
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5130,11 +5130,19 @@
     if (N2C && N2C->isNullValue())
       return N1;
     break;
+  case ISD::MUL:
+    assert(VT.isInteger() && "This operator does not apply to FP types!");
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    if (N2C && (N1.getOpcode() == ISD::VSCALE)) {
+      int64_t MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getSExtValue();
+      return getVScale(DL, VT, MulImm * N2C->getSExtValue());
+    }
+    break;
   case ISD::UDIV:
   case ISD::UREM:
   case ISD::MULHU:
   case ISD::MULHS:
-  case ISD::MUL:
   case ISD::SDIV:
   case ISD::SREM:
   case ISD::SMIN:
@@ -5167,6 +5175,11 @@
            "Invalid FCOPYSIGN!");
     break;
   case ISD::SHL:
+    if (N2C && (N1.getOpcode() == ISD::VSCALE)) {
+      int64_t MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getSExtValue();
+      return getVScale(DL, VT, MulImm << N2C->getSExtValue());
+    }
+    LLVM_FALLTHROUGH;
   case ISD::SRA:
   case ISD::SRL:
     if (SDValue V = simplifyShift(N1, N2))
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1468,6 +1468,10 @@
                              TLI.getPointerTy(DAG.getDataLayout(), AS));
     }
 
+    int Scaling;
+    if (match(C, m_VScale(DAG.getDataLayout(), Scaling)))
+      return DAG.getVScale(getCurSDLoc(), VT, Scaling);
+
     if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
       return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
 
@@ -5732,6 +5736,12 @@
     // By default, turn this into a target intrinsic node.
     visitTargetIntrinsic(I, Intrinsic);
     return;
+  case Intrinsic::vscale: {
+    int Scaling;
+    match(&I, m_VScale(DAG.getDataLayout(), Scaling));
+    setValue(&I, DAG.getVScale(getCurSDLoc(), MVT::i32, Scaling));
+    return;
+  }
   case Intrinsic::vastart:  visitVAStart(I); return;
   case Intrinsic::vaend:    visitVAEnd(I); return;
   case Intrinsic::vacopy:   visitVACopy(I); return;
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -170,6 +170,7 @@
   case ISD::CopyToReg:                  return "CopyToReg";
   case ISD::CopyFromReg:                return "CopyFromReg";
   case ISD::UNDEF:                      return "undef";
+  case ISD::VSCALE:                     return "vscale";
   case ISD::MERGE_VALUES:               return "merge_values";
   case ISD::INLINEASM:                  return "inlineasm";
   case ISD::INLINEASM_BR:               return "inlineasm_br";
Index: llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -62,6 +62,9 @@
                                     unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 
+  template <signed Low, signed High, signed Scale>
+  bool SelectRDVLImm(SDValue N, SDValue &Imm);
+
   bool tryMLAV64LaneV128(SDNode *N);
   bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
   bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
@@ -675,6 +678,23 @@
   return SDValue(Node, 0);
 }
 
+// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
+template<signed Low, signed High, signed Scale>
+bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
+  if (!isa<ConstantSDNode>(N))
+    return false;
+
+  int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
+  if ((MulImm % std::abs(Scale)) == 0) {
+    int64_t RDVLImm = MulImm / Scale;
+    if ((RDVLImm >= Low) && (RDVLImm <= High)) {
+      Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
+      return true;
+    }
+  }
+
+  return false;
+}
 
 /// SelectArithExtendedRegister - Select a "extended register" operand.  This
 /// operand folds in an extend followed by an optional left shift.
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -743,6 +743,7 @@
   SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -821,6 +821,9 @@
       }
     }
 
+    if (Subtarget->hasSVE())
+      setOperationAction(ISD::VSCALE, MVT::i32, Custom);
+
     setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
   }
 
@@ -3213,6 +3216,8 @@
     return LowerATOMIC_LOAD_AND(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC:
     return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::VSCALE:
+    return LowerVSCALE(Op, DAG);
   }
 }
 
@@ -8522,6 +8527,16 @@
   return DAG.getMergeValues(Ops, dl);
 }
 
+SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  assert(VT != MVT::i64 && "Expected illegal VSCALE node");
+
+  SDLoc DL(Op);
+  int64_t MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getSExtValue();
+  return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm), DL, VT);
+}
+
 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
Index: llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -46,6 +46,17 @@
 def AArch64ld1_gather_sxtw_scaled    : SDNode<"AArch64ISD::GLD1_SXTW_SCALED",   SDT_AArch64_GLD1,     [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
 def AArch64ld1_gather_imm            : SDNode<"AArch64ISD::GLD1_IMM",           SDT_AArch64_GLD1_IMM, [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
 
+// SVE CNT/INC/RDVL
+def sve_rdvl_imm : ComplexPattern<i32, 1, "SelectRDVLImm<-32, 31, 16>">;
+def sve_cnth_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 8>">;
+def sve_cntw_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 4>">;
+def sve_cntd_imm : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, 2>">;
+
+// SVE DEC
+def sve_cnth_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -8>">;
+def sve_cntw_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -4>">;
+def sve_cntd_imm_neg : ComplexPattern<i32, 1, "SelectRDVLImm<1, 16, -2>">;
+
 def AArch64ld1s_gather               : SDNode<"AArch64ISD::GLD1S",              SDT_AArch64_GLD1,     [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
 def AArch64ld1s_gather_scaled        : SDNode<"AArch64ISD::GLD1S_SCALED",       SDT_AArch64_GLD1,     [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
 def AArch64ld1s_gather_uxtw          : SDNode<"AArch64ISD::GLD1S_UXTW",         SDT_AArch64_GLD1,     [SDNPHasChain, SDNPMayLoad, SDNPOptInGlue]>;
@@ -1093,6 +1104,20 @@
   def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8),  (SXTB_ZPmZ_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
   def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8),  (SXTB_ZPmZ_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>;
 
+  // General case that we ideally never want to match.
+  def : Pat<(vscale GPR64:$scale), (MADDXrrr (UBFMXri (RDVLI_XI 1), 4, 63), $scale, XZR)>;
+
+  let AddedComplexity = 5 in {
+    def : Pat<(vscale (sve_rdvl_imm i32:$imm)), (RDVLI_XI $imm)>;
+    def : Pat<(vscale (sve_cnth_imm i32:$imm)), (CNTH_XPiI 31, $imm)>;
+    def : Pat<(vscale (sve_cntw_imm i32:$imm)), (CNTW_XPiI 31, $imm)>;
+    def : Pat<(vscale (sve_cntd_imm i32:$imm)), (CNTD_XPiI 31, $imm)>;
+
+    def : Pat<(vscale (sve_cnth_imm_neg i32:$imm)), (SUBXrs XZR, (CNTH_XPiI 31, $imm), 0)>;
+    def : Pat<(vscale (sve_cntw_imm_neg i32:$imm)), (SUBXrs XZR, (CNTW_XPiI 31, $imm), 0)>;
+    def : Pat<(vscale (sve_cntd_imm_neg i32:$imm)), (SUBXrs XZR, (CNTD_XPiI 31, $imm), 0)>;
+  }
+
   def : Pat<(nxv16i8 (bitconvert (nxv8i16 ZPR:$src))), (nxv16i8 ZPR:$src)>;
   def : Pat<(nxv16i8 (bitconvert (nxv4i32 ZPR:$src))), (nxv16i8 ZPR:$src)>;
   def : Pat<(nxv16i8 (bitconvert (nxv2i64 ZPR:$src))), (nxv16i8 ZPR:$src)>;
Index: llvm/test/CodeGen/AArch64/sve-vscale.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/sve-vscale.ll
@@ -0,0 +1,159 @@
+; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
+; RUN: opt -codegenprepare -S < %s | llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 | FileCheck %s
+
+;
+; RDVL
+;
+
+; CHECK-LABEL: rdvl:
+; CHECK:       rdvl x0, #1
+; CHECK-NEXT:  ret
+define i32 @rdvl() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, 16
+  ret i32 %1
+}
+
+; CHECK-LABEL: rdvl_arg:
+; CHECK:       rdvl x0, #1
+; CHECK-NEXT:  ret
+define i32 @rdvl_arg() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 16)
+  ret i32 %vscale
+}
+
+; CHECK-LABEL: rdvl_const:
+; CHECK:       rdvl x0, #1
+; CHECK-NEXT:  ret
+define i32 @rdvl_const() nounwind {
+  ret i32 ptrtoint (<vscale x 1 x i8>* getelementptr (<vscale x 1 x i8>, <vscale x 1 x i8>* null, i64 16) to i32)
+}
+
+; CHECK-LABEL: rdvl_3:
+; CHECK:       rdvl [[VL_B:x[0-9]+]], #1
+; CHECK-NEXT:  lsr  [[VL_Q:x[0-9]+]], [[VL_B]], #4
+; CHECK-NEXT:  mov  w[[MUL:[0-9]+]], #3
+; CHECK-NEXT:  mul  x0, [[VL_Q]], x[[MUL]]
+; CHECK-NEXT:  ret
+define i32 @rdvl_3() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, 3
+  ret i32 %1
+}
+
+
+; CHECK-LABEL: rdvl_min:
+; CHECK:       rdvl x0, #-32
+; CHECK-NEXT:  ret
+define i32 @rdvl_min() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, -512
+  ret i32 %1
+}
+
+; CHECK-LABEL: rdvl_max:
+; CHECK:       rdvl x0, #31
+; CHECK-NEXT:  ret
+define i32 @rdvl_max() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, 496
+  ret i32 %1
+}
+
+;
+; CNTH
+;
+
+; CHECK-LABEL: cnth:
+; CHECK:       cnth x0{{$}}
+; CHECK-NEXT:  ret
+define i32 @cnth() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, 8
+  ret i32 %1
+}
+
+; CHECK-LABEL: cnth_max:
+; CHECK:       cnth x0, all, mul #15
+; CHECK-NEXT:  ret
+define i32 @cnth_max() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, 120
+  ret i32 %1
+}
+
+; CHECK-LABEL: cnth_neg:
+; CHECK:       cnth [[CNT:x[0-9]+]]
+; CHECK:       neg x0, [[CNT]]
+; CHECK-NEXT:  ret
+define i32 @cnth_neg() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, -8
+  ret i32 %1
+}
+
+;
+; CNTW
+;
+
+; CHECK-LABEL: cntw:
+; CHECK:       cntw x0{{$}}
+; CHECK-NEXT:  ret
+define i32 @cntw() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, 4
+  ret i32 %1
+}
+
+; CHECK-LABEL: cntw_max:
+; CHECK:       cntw x0, all, mul #15
+; CHECK-NEXT:  ret
+define i32 @cntw_max() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, 60
+  ret i32 %1
+}
+
+; CHECK-LABEL: cntw_neg:
+; CHECK:       cntw [[CNT:x[0-9]+]]
+; CHECK:       neg x0, [[CNT]]
+; CHECK-NEXT:  ret
+define i32 @cntw_neg() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, -4
+  ret i32 %1
+}
+
+;
+; CNTD
+;
+
+; CHECK-LABEL: cntd:
+; CHECK:       cntd x0{{$}}
+; CHECK-NEXT:  ret
+define i32 @cntd() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, 2
+  ret i32 %1
+}
+
+; CHECK-LABEL: cntd_max:
+; CHECK:       cntd x0, all, mul #15
+; CHECK-NEXT:  ret
+define i32 @cntd_max() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, 30
+  ret i32 %1
+}
+
+; CHECK-LABEL: cntd_neg:
+; CHECK:       cntd [[CNT:x[0-9]+]]
+; CHECK:       neg x0, [[CNT]]
+; CHECK-NEXT:  ret
+define i32 @cntd_neg() nounwind {
+  %vscale = call i32 @llvm.vscale(i32 1)
+  %1 = mul i32 %vscale, -2
+  ret i32 %1
+}
+
+declare i32 @llvm.vscale(i32)