diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -854,6 +854,9 @@ MVT getVPExplicitVectorLengthTy() const override; + bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF, + bool IsScalable) const override; + /// RVV code generation for fixed length vectors does not lower all /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to /// merge. However, merging them creates a BUILD_VECTOR that is just as diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1174,6 +1174,31 @@ return Subtarget.getXLenVT(); } +// Return false if we can lower get_vector_length to a vsetvli intrinsic. +bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, + unsigned VF, + bool IsScalable) const { + if (!Subtarget.hasVInstructions()) + return true; + + if (!IsScalable) + return true; + + if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT()) + return true; + + // Don't allow VF=1 if those types are't legal. + if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN()) + return true; + + // VLEN=32 support is incomplete. + if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) + return true; + + // We can only use vsetvli for power of 2 in the range [1, 64]. + return VF > 64 || !isPowerOf2_32(VF); +} + bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, @@ -6487,6 +6512,39 @@ return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); } +static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT XLenVT = Subtarget.getXLenVT(); + + unsigned ElementWidth = N->getConstantOperandVal(2); + // Sanitize the ElementWidth. + if (ElementWidth < 8 || ElementWidth > Subtarget.getELEN() || + !isPowerOf2_64(ElementWidth)) + ElementWidth = 8; + unsigned VF = N->getConstantOperandVal(3); + assert(VF >= 1 && VF <= 64 && isPowerOf2_32(VF) && "Unexpected VF"); + // We want to use an SEW and LMUL that matches the ElementWidth if possible. + unsigned Factor = ElementWidth / 8; + if (VF * Factor <= 64) + VF = VF * Factor; + else + ElementWidth = 8; + + unsigned LMulExp = Log2_32(VF); + unsigned LMulVal = LMulExp >= 3 ? LMulExp - 3 : LMulExp + 5; + unsigned SewVal = Log2_32(ElementWidth) - 3; + + SDLoc DL(N); + + SDValue LMul = DAG.getTargetConstant(LMulVal, DL, XLenVT); + SDValue Sew = DAG.getTargetConstant(SewVal, DL, XLenVT); + + SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1)); + + SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul); +} + SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = Op.getConstantOperandVal(0); @@ -6512,6 +6570,8 @@ IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP; return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); } + case Intrinsic::experimental_get_vector_length: + return lowerGetVectorLength(Op.getNode(), DAG, Subtarget); case Intrinsic::riscv_vmv_x_s: assert(Op.getValueType() == XLenVT && "Unexpected VT!"); return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), @@ -9326,6 +9386,11 @@ default: llvm_unreachable( "Don't know how to custom type legalize this intrinsic!"); + case Intrinsic::experimental_get_vector_length: { + SDValue Res = lowerGetVectorLength(N, DAG, Subtarget); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + return; + } case Intrinsic::riscv_orc_b: { SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); diff --git a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll --- a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare i32 @llvm.experimental.get.vector.length.i16(i16, i32, i32, i1) declare i32 @llvm.experimental.get.vector.length.i32(i32, i32, i32, i1) @@ -21,51 +21,19 @@ } define i32 @vector_length_i32(i32 zeroext %tc) { -; RV32-LABEL: vector_length_i32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a1, a1, 2 -; RV32-NEXT: bltu a0, a1, .LBB1_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB1_2: -; RV32-NEXT: ret -; -; RV64-LABEL: vector_length_i32: -; RV64: # %bb.0: -; RV64-NEXT: sext.w a0, a0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a1, a1, 2 -; RV64-NEXT: bltu a0, a1, .LBB1_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB1_2: -; RV64-NEXT: ret +; CHECK-LABEL: vector_length_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 8, i32 2, i1 true) ret i32 %a } define i32 @vector_length_XLen(iXLen zeroext %tc) { -; RV32-LABEL: vector_length_XLen: -; RV32: # %bb.0: -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a1, a1, 2 -; RV32-NEXT: bltu a0, a1, .LBB2_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB2_2: -; RV32-NEXT: ret -; -; RV64-LABEL: vector_length_XLen: -; RV64: # %bb.0: -; RV64-NEXT: sext.w a0, a0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a1, a1, 2 -; RV64-NEXT: bltu a0, a1, .LBB2_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB2_2: -; RV64-NEXT: ret +; CHECK-LABEL: vector_length_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 8, i32 2, i1 true) ret i32 %a } @@ -128,3 +96,161 @@ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 8, i32 2, i1 false) ret i32 %a } + +define i32 @vector_length_e8vf64_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e8vf64_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 8, i32 64, i1 true) + ret i32 %a +} + +define i32 @vector_length_e8vf64_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_e8vf64_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 8, i32 64, i1 true) + ret i32 %a +} + +define i32 @vector_length_e16vf2_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e16vf2_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16, mf2, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 16, i32 2, i1 true) + ret i32 %a +} + +define i32 @vector_length_e16vf2_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_e16vf2_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16, mf2, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 16, i32 2, i1 true) + ret i32 %a +} + +define i32 @vector_length_e16vf32_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e16vf32_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 16, i32 32, i1 true) + ret i32 %a +} + +define i32 @vector_length_e16vf32_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_e16vf32_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 16, i32 32, i1 true) + ret i32 %a +} + +;; Element width ignored because VF is too large. +define i32 @vector_length_e16vf64_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e16vf64_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 16, i32 64, i1 true) + ret i32 %a +} + +;; Element width ignored because VF is too large. +define i32 @vector_length_e16vf64_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_e16vf64_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 16, i32 64, i1 true) + ret i32 %a +} + +;; Element width ignored +define i32 @vector_length_e7vf2_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e7vf2_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 7, i32 2, i1 true) + ret i32 %a +} + +;; Element width ignored +define i32 @vector_length_e128vf2_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e128vf2_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 128, i32 2, i1 true) + ret i32 %a +} + +;; Element width ignored +define i32 @vector_length_e9vf2_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e9vf2_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 9, i32 2, i1 true) + ret i32 %a +} + +define i32 @vector_length_e8vf128_i32(i32 zeroext %tc) { +; RV32-LABEL: vector_length_e8vf128_i32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: bltu a0, a1, .LBB17_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB17_2: +; RV32-NEXT: ret +; +; RV64-LABEL: vector_length_e8vf128_i32: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: bltu a0, a1, .LBB17_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB17_2: +; RV64-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 8, i32 128, i1 true) + ret i32 %a +} + +define i32 @vector_length_e8vf3_i32(i32 zeroext %tc) { +; RV32-LABEL: vector_length_e8vf3_i32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: srli a1, a1, 3 +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: bltu a0, a1, .LBB18_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB18_2: +; RV32-NEXT: ret +; +; RV64-LABEL: vector_length_e8vf3_i32: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: srli a1, a1, 3 +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: bltu a0, a1, .LBB18_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB18_2: +; RV64-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 9, i32 3, i1 true) + ret i32 %a +}