diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -870,6 +870,9 @@ MVT getVPExplicitVectorLengthTy() const override; + bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF, + bool IsScalable) const override; + /// RVV code generation for fixed length vectors does not lower all /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to /// merge. However, merging them creates a BUILD_VECTOR that is just as diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1175,6 +1175,33 @@ return Subtarget.getXLenVT(); } +// Return false if we can lower get_vector_length to a vsetvli intrinsic. +bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, + unsigned VF, + bool IsScalable) const { + if (!Subtarget.hasVInstructions()) + return true; + + if (!IsScalable) + return true; + + if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT()) + return true; + + // Don't allow VF=1 if those types are't legal. + if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN()) + return true; + + // VLEN=32 support is incomplete. + if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) + return true; + + // The maximum VF is for the smallest element width with LMUL=8. + // VF must be a power of 2. + unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8; + return VF > MaxVF || !isPowerOf2_32(VF); +} + bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, @@ -6623,6 +6650,48 @@ return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); } +// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support +// scalable vector llvm.get.vector.length for now. +// +// We need to convert from a scalable VF to a vsetvli with VLMax equal to +// (vscale * VF). The vscale and VF are independent of element width. We use +// SEW=8 for the vsetvli because it is the only element width that supports all +// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is +// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The +// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different +// SEW and LMUL are better for the surrounding vector instructions. +static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT XLenVT = Subtarget.getXLenVT(); + + // The smallest LMUL is only valid for the smallest element width. + const unsigned ElementWidth = 8; + + // Determine the VF that corresponds to LMUL 1 for ElementWidth. + unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth; + // We don't support VF==1 with ELEN==32. + unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELEN(); + + unsigned VF = N->getConstantOperandVal(2); + assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) && + "Unexpected VF"); + + bool Fractional = VF < LMul1VF; + unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF; + unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional); + unsigned VSEW = RISCVVType::encodeSEW(ElementWidth); + + SDLoc DL(N); + + SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT); + SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT); + + SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1)); + + SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul); +} + SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = Op.getConstantOperandVal(0); @@ -6648,6 +6717,8 @@ IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP; return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); } + case Intrinsic::experimental_get_vector_length: + return lowerGetVectorLength(Op.getNode(), DAG, Subtarget); case Intrinsic::riscv_vmv_x_s: assert(Op.getValueType() == XLenVT && "Unexpected VT!"); return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), @@ -9471,6 +9542,11 @@ default: llvm_unreachable( "Don't know how to custom type legalize this intrinsic!"); + case Intrinsic::experimental_get_vector_length: { + SDValue Res = lowerGetVectorLength(N, DAG, Subtarget); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + return; + } case Intrinsic::riscv_orc_b: { SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); diff --git a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll --- a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare i32 @llvm.experimental.get.vector.length.i16(i16, i32, i1) declare i32 @llvm.experimental.get.vector.length.i32(i32, i32, i1) @@ -21,51 +21,19 @@ } define i32 @vector_length_i32(i32 zeroext %tc) { -; RV32-LABEL: vector_length_i32: -; RV32: # %bb.0: -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a1, a1, 2 -; RV32-NEXT: bltu a0, a1, .LBB1_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB1_2: -; RV32-NEXT: ret -; -; RV64-LABEL: vector_length_i32: -; RV64: # %bb.0: -; RV64-NEXT: sext.w a0, a0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a1, a1, 2 -; RV64-NEXT: bltu a0, a1, .LBB1_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB1_2: -; RV64-NEXT: ret +; CHECK-LABEL: vector_length_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 2, i1 true) ret i32 %a } define i32 @vector_length_XLen(iXLen zeroext %tc) { -; RV32-LABEL: vector_length_XLen: -; RV32: # %bb.0: -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a1, a1, 2 -; RV32-NEXT: bltu a0, a1, .LBB2_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB2_2: -; RV32-NEXT: ret -; -; RV64-LABEL: vector_length_XLen: -; RV64: # %bb.0: -; RV64-NEXT: sext.w a0, a0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a1, a1, 2 -; RV64-NEXT: bltu a0, a1, .LBB2_2 -; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB2_2: -; RV64-NEXT: ret +; CHECK-LABEL: vector_length_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 2, i1 true) ret i32 %a } @@ -128,3 +96,237 @@ %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 2, i1 false) ret i32 %a } + +define i32 @vector_length_vf1_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_vf1_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 1, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf1_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_vf1_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 1, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf2_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_vf2_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 2, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf2_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_vf2_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 2, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf4_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_vf4_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf2, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 4, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf4_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_vf4_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf2, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 4, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf8_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_vf8_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m1, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 8, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf8_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_vf8_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m1, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 8, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf16_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_vf16_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m2, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 16, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf16_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_vf16_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m2, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 16, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf32_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_vf32_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m4, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 32, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf32_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_vf32_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m4, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 32, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf64_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_vf64_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 64, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf64_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_vf64_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 64, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf128_i32(i32 zeroext %tc) { +; RV32-LABEL: vector_length_vf128_i32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: bltu a0, a1, .LBB20_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB20_2: +; RV32-NEXT: ret +; +; RV64-LABEL: vector_length_vf128_i32: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: bltu a0, a1, .LBB20_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB20_2: +; RV64-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 128, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf128_XLen(iXLen zeroext %tc) { +; RV32-LABEL: vector_length_vf128_XLen: +; RV32: # %bb.0: +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: bltu a0, a1, .LBB21_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB21_2: +; RV32-NEXT: ret +; +; RV64-LABEL: vector_length_vf128_XLen: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: bltu a0, a1, .LBB21_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB21_2: +; RV64-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 128, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf3_i32(i32 zeroext %tc) { +; RV32-LABEL: vector_length_vf3_i32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: srli a1, a1, 3 +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: bltu a0, a1, .LBB22_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB22_2: +; RV32-NEXT: ret +; +; RV64-LABEL: vector_length_vf3_i32: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: srli a1, a1, 3 +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: bltu a0, a1, .LBB22_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB22_2: +; RV64-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 3, i1 true) + ret i32 %a +} + +define i32 @vector_length_vf3_XLen(iXLen zeroext %tc) { +; RV32-LABEL: vector_length_vf3_XLen: +; RV32: # %bb.0: +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: srli a1, a1, 3 +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: bltu a0, a1, .LBB23_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a0, a1 +; RV32-NEXT: .LBB23_2: +; RV32-NEXT: ret +; +; RV64-LABEL: vector_length_vf3_XLen: +; RV64: # %bb.0: +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: srli a1, a1, 3 +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: bltu a0, a1, .LBB23_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: mv a0, a1 +; RV64-NEXT: .LBB23_2: +; RV64-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 3, i1 true) + ret i32 %a +}