diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -459,7 +459,9 @@
     return true;
   }
 
-  virtual bool shouldExpandGetVectorLength(unsigned VF) const { return true; }
+  virtual bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF) const {
+    return true;
+  }
 
   // Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to
   // vecreduce(op(x, y)) for the reduction opcode RedOpc.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7300,7 +7300,9 @@
            "Expected positive VF");
     unsigned VF = cast<ConstantInt>(I.getOperand(2))->getZExtValue();
 
-    if (!TLI.shouldExpandGetVectorLength(VF)) {
+    SDValue TripCount = getValue(I.getOperand(0));
+
+    if (!TLI.shouldExpandGetVectorLength(TripCount.getValueType(), VF)) {
       visitTargetIntrinsic(I, Intrinsic);
       return;
     }
@@ -7308,7 +7310,6 @@
     // Expand to a umin between the trip count and the maximum elements the type
     // can hold.
     EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
-    SDValue TripCount = getValue(I.getOperand(0));
 
     // Extend the trip count to at least the result VT.
     if (TripCount.getValueType().bitsLT(VT))
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -854,6 +854,8 @@
 
   MVT getVPExplicitVectorLengthTy() const override;
 
+  bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF) const override;
+
   /// RVV code generation for fixed length vectors does not lower all
   /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
   /// merge. However, merging them creates a BUILD_VECTOR that is just as
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1174,6 +1174,27 @@
   return Subtarget.getXLenVT();
 }
 
+// Return false if we can lower get_vector_length to a vsetvli intrinsic.
+bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
+                                                      unsigned VF) const {
+  if (!Subtarget.hasVInstructions())
+    return true;
+
+  if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
+    return true;
+
+  // Don't allow VF=1 if those types are't legal.
+  if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN())
+    return true;
+
+  // VLEN=32 support is incomplete.
+  if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
+    return true;
+
+  // We can only use vsetvli for power of 2 in the range [1, 64].
+  return VF > 64 || !isPowerOf2_32(VF);
+}
+
 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                                              const CallInst &I,
                                              MachineFunction &MF,
@@ -6487,6 +6508,39 @@
   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
 }
 
+static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
+                                    const RISCVSubtarget &Subtarget) {
+  MVT XLenVT = Subtarget.getXLenVT();
+
+  unsigned ElementWidth = N->getConstantOperandVal(2);
+  // Sanitize the ElementWidth.
+  if (ElementWidth < 8 || ElementWidth > Subtarget.getELEN() ||
+      !isPowerOf2_64(ElementWidth))
+    ElementWidth = 8;
+  unsigned VF = N->getConstantOperandVal(3);
+  assert(VF >= 1 && VF <= 64 && isPowerOf2_32(VF) && "Unexpected VF");
+  // We want to use an SEW and LMUL that matches the ElementWidth if possible.
+  unsigned Factor = ElementWidth / 8;
+  if (VF * Factor <= 64)
+    VF = VF * Factor;
+  else
+    ElementWidth = 8;
+
+  unsigned LMulExp = Log2_32(VF);
+  unsigned LMulVal = LMulExp >= 3 ? LMulExp - 3 : LMulExp + 5;
+  unsigned SewVal = Log2_32(ElementWidth) - 3;
+
+  SDLoc DL(N);
+
+  SDValue LMul = DAG.getTargetConstant(LMulVal, DL, XLenVT);
+  SDValue Sew = DAG.getTargetConstant(SewVal, DL, XLenVT);
+
+  SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
+
+  SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
+}
+
 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                      SelectionDAG &DAG) const {
   unsigned IntNo = Op.getConstantOperandVal(0);
@@ -6512,6 +6566,8 @@
         IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
   }
+  case Intrinsic::experimental_get_vector_length:
+    return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
   case Intrinsic::riscv_vmv_x_s:
     assert(Op.getValueType() == XLenVT && "Unexpected VT!");
     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
@@ -9326,6 +9382,11 @@
     default:
       llvm_unreachable(
           "Don't know how to custom type legalize this intrinsic!");
+    case Intrinsic::experimental_get_vector_length: {
+      SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
+      Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+      return;
+    }
     case Intrinsic::riscv_orc_b: {
       SDValue NewOp =
           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
diff --git a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
--- a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare i32 @llvm.experimental.get.vector.length.i16(i16, i32, i32)
 declare i32 @llvm.experimental.get.vector.length.i32(i32, i32, i32)
@@ -21,51 +21,177 @@
 }
 
 define i32 @vector_length_i32(i32 zeroext %tc) {
-; RV32-LABEL: vector_length_i32:
+; CHECK-LABEL: vector_length_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 8, i32 2)
+  ret i32 %a
+}
+
+define i32 @vector_length_XLen(iXLen zeroext %tc) {
+; CHECK-LABEL: vector_length_XLen:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 8, i32 2)
+  ret i32 %a
+}
+
+define i32 @vector_length_e8vf64_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_e8vf64_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, m8, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 8, i32 64)
+  ret i32 %a
+}
+
+define i32 @vector_length_e8vf64_XLen(iXLen zeroext %tc) {
+; CHECK-LABEL: vector_length_e8vf64_XLen:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, m8, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 8, i32 64)
+  ret i32 %a
+}
+
+define i32 @vector_length_e16vf2_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_e16vf2_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 16, i32 2)
+  ret i32 %a
+}
+
+define i32 @vector_length_e16vf2_XLen(iXLen zeroext %tc) {
+; CHECK-LABEL: vector_length_e16vf2_XLen:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e16, mf2, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 16, i32 2)
+  ret i32 %a
+}
+
+define i32 @vector_length_e16vf32_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_e16vf32_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e16, m8, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 16, i32 32)
+  ret i32 %a
+}
+
+define i32 @vector_length_e16vf32_XLen(iXLen zeroext %tc) {
+; CHECK-LABEL: vector_length_e16vf32_XLen:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e16, m8, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 16, i32 32)
+  ret i32 %a
+}
+
+;; Element width ignored because VF is too large.
+define i32 @vector_length_e16vf64_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_e16vf64_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, m8, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 16, i32 64)
+  ret i32 %a
+}
+
+;; Element width ignored because VF is too large.
+define i32 @vector_length_e16vf64_XLen(iXLen zeroext %tc) {
+; CHECK-LABEL: vector_length_e16vf64_XLen:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, m8, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 16, i32 64)
+  ret i32 %a
+}
+
+;; Element width ignored
+define i32 @vector_length_e7vf2_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_e7vf2_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 7, i32 2)
+  ret i32 %a
+}
+
+;; Element width ignored
+define i32 @vector_length_e128vf2_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_e128vf2_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 128, i32 2)
+  ret i32 %a
+}
+
+;; Element width ignored
+define i32 @vector_length_e9vf2_i32(i32 zeroext %tc) {
+; CHECK-LABEL: vector_length_e9vf2_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, mf4, ta, ma
+; CHECK-NEXT:    ret
+  %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 9, i32 2)
+  ret i32 %a
+}
+
+define i32 @vector_length_e8vf128_i32(i32 zeroext %tc) {
+; RV32-LABEL: vector_length_e8vf128_i32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    srli a1, a1, 2
-; RV32-NEXT:    bltu a0, a1, .LBB1_2
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    bltu a0, a1, .LBB14_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a0, a1
-; RV32-NEXT:  .LBB1_2:
+; RV32-NEXT:  .LBB14_2:
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vector_length_i32:
+; RV64-LABEL: vector_length_e8vf128_i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    sext.w a0, a0
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    srli a1, a1, 2
-; RV64-NEXT:    bltu a0, a1, .LBB1_2
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    bltu a0, a1, .LBB14_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a0, a1
-; RV64-NEXT:  .LBB1_2:
+; RV64-NEXT:  .LBB14_2:
 ; RV64-NEXT:    ret
-  %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 8, i32 2)
+  %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 9, i32 128)
   ret i32 %a
 }
 
-define i32 @vector_length_XLen(iXLen zeroext %tc) {
-; RV32-LABEL: vector_length_XLen:
+define i32 @vector_length_e8vf3_i32(i32 zeroext %tc) {
+; RV32-LABEL: vector_length_e8vf3_i32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    srli a1, a1, 2
-; RV32-NEXT:    bltu a0, a1, .LBB2_2
+; RV32-NEXT:    srli a1, a1, 3
+; RV32-NEXT:    slli a2, a1, 1
+; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    bltu a0, a1, .LBB15_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a0, a1
-; RV32-NEXT:  .LBB2_2:
+; RV32-NEXT:  .LBB15_2:
 ; RV32-NEXT:    ret
 ;
-; RV64-LABEL: vector_length_XLen:
+; RV64-LABEL: vector_length_e8vf3_i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    sext.w a0, a0
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    srli a1, a1, 2
-; RV64-NEXT:    bltu a0, a1, .LBB2_2
+; RV64-NEXT:    srli a1, a1, 3
+; RV64-NEXT:    slli a2, a1, 1
+; RV64-NEXT:    add a1, a2, a1
+; RV64-NEXT:    bltu a0, a1, .LBB15_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a0, a1
-; RV64-NEXT:  .LBB2_2:
+; RV64-NEXT:  .LBB15_2:
 ; RV64-NEXT:    ret
-  %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 8, i32 2)
+  %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 9, i32 3)
   ret i32 %a
 }