diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -459,7 +459,9 @@ return true; } - virtual bool shouldExpandGetVectorLength(unsigned VF) const { return true; } + virtual bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF) const { + return true; + } // Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to // vecreduce(op(x, y)) for the reduction opcode RedOpc. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -7300,7 +7300,9 @@ "Expected positive VF"); unsigned VF = cast(I.getOperand(2))->getZExtValue(); - if (!TLI.shouldExpandGetVectorLength(VF)) { + SDValue TripCount = getValue(I.getOperand(0)); + + if (!TLI.shouldExpandGetVectorLength(TripCount.getValueType(), VF)) { visitTargetIntrinsic(I, Intrinsic); return; } @@ -7308,7 +7310,6 @@ // Expand to a umin between the trip count and the maximum elements the type // can hold. EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - SDValue TripCount = getValue(I.getOperand(0)); // Extend the trip count to at least the result VT. if (TripCount.getValueType().bitsLT(VT)) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -854,6 +854,8 @@ MVT getVPExplicitVectorLengthTy() const override; + bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF) const override; + /// RVV code generation for fixed length vectors does not lower all /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to /// merge. However, merging them creates a BUILD_VECTOR that is just as diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1174,6 +1174,27 @@ return Subtarget.getXLenVT(); } +// Return false if we can lower get_vector_length to a vsetvli intrinsic. +bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, + unsigned VF) const { + if (!Subtarget.hasVInstructions()) + return true; + + if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT()) + return true; + + // Don't allow VF=1 if those types are't legal. + if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN()) + return true; + + // VLEN=32 support is incomplete. + if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock) + return true; + + // We can only use vsetvli for power of 2 in the range [1, 64]. + return VF > 64 || !isPowerOf2_32(VF); +} + bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, @@ -6487,6 +6508,39 @@ return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands); } +static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT XLenVT = Subtarget.getXLenVT(); + + unsigned ElementWidth = N->getConstantOperandVal(2); + // Sanitize the ElementWidth. + if (ElementWidth < 8 || ElementWidth > Subtarget.getELEN() || + !isPowerOf2_64(ElementWidth)) + ElementWidth = 8; + unsigned VF = N->getConstantOperandVal(3); + assert(VF >= 1 && VF <= 64 && isPowerOf2_32(VF) && "Unexpected VF"); + // We want to use an SEW and LMUL that matches the ElementWidth if possible. + unsigned Factor = ElementWidth / 8; + if (VF * Factor <= 64) + VF = VF * Factor; + else + ElementWidth = 8; + + unsigned LMulExp = Log2_32(VF); + unsigned LMulVal = LMulExp >= 3 ? LMulExp - 3 : LMulExp + 5; + unsigned SewVal = Log2_32(ElementWidth) - 3; + + SDLoc DL(N); + + SDValue LMul = DAG.getTargetConstant(LMulVal, DL, XLenVT); + SDValue Sew = DAG.getTargetConstant(SewVal, DL, XLenVT); + + SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1)); + + SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT); + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul); +} + SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = Op.getConstantOperandVal(0); @@ -6512,6 +6566,8 @@ IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP; return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); } + case Intrinsic::experimental_get_vector_length: + return lowerGetVectorLength(Op.getNode(), DAG, Subtarget); case Intrinsic::riscv_vmv_x_s: assert(Op.getValueType() == XLenVT && "Unexpected VT!"); return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(), @@ -9326,6 +9382,11 @@ default: llvm_unreachable( "Don't know how to custom type legalize this intrinsic!"); + case Intrinsic::experimental_get_vector_length: { + SDValue Res = lowerGetVectorLength(N, DAG, Subtarget); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + return; + } case Intrinsic::riscv_orc_b: { SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); diff --git a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll --- a/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/get_vector_length.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare i32 @llvm.experimental.get.vector.length.i16(i16, i32, i32) declare i32 @llvm.experimental.get.vector.length.i32(i32, i32, i32) @@ -21,51 +21,177 @@ } define i32 @vector_length_i32(i32 zeroext %tc) { -; RV32-LABEL: vector_length_i32: +; CHECK-LABEL: vector_length_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 8, i32 2) + ret i32 %a +} + +define i32 @vector_length_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 8, i32 2) + ret i32 %a +} + +define i32 @vector_length_e8vf64_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e8vf64_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 8, i32 64) + ret i32 %a +} + +define i32 @vector_length_e8vf64_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_e8vf64_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 8, i32 64) + ret i32 %a +} + +define i32 @vector_length_e16vf2_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e16vf2_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16, mf2, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 16, i32 2) + ret i32 %a +} + +define i32 @vector_length_e16vf2_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_e16vf2_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16, mf2, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 16, i32 2) + ret i32 %a +} + +define i32 @vector_length_e16vf32_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e16vf32_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 16, i32 32) + ret i32 %a +} + +define i32 @vector_length_e16vf32_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_e16vf32_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 16, i32 32) + ret i32 %a +} + +;; Element width ignored because VF is too large. +define i32 @vector_length_e16vf64_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e16vf64_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 16, i32 64) + ret i32 %a +} + +;; Element width ignored because VF is too large. +define i32 @vector_length_e16vf64_XLen(iXLen zeroext %tc) { +; CHECK-LABEL: vector_length_e16vf64_XLen: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m8, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 16, i32 64) + ret i32 %a +} + +;; Element width ignored +define i32 @vector_length_e7vf2_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e7vf2_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 7, i32 2) + ret i32 %a +} + +;; Element width ignored +define i32 @vector_length_e128vf2_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e128vf2_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 128, i32 2) + ret i32 %a +} + +;; Element width ignored +define i32 @vector_length_e9vf2_i32(i32 zeroext %tc) { +; CHECK-LABEL: vector_length_e9vf2_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, mf4, ta, ma +; CHECK-NEXT: ret + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 9, i32 2) + ret i32 %a +} + +define i32 @vector_length_e8vf128_i32(i32 zeroext %tc) { +; RV32-LABEL: vector_length_e8vf128_i32: ; RV32: # %bb.0: ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a1, a1, 2 -; RV32-NEXT: bltu a0, a1, .LBB1_2 +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: bltu a0, a1, .LBB14_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB1_2: +; RV32-NEXT: .LBB14_2: ; RV32-NEXT: ret ; -; RV64-LABEL: vector_length_i32: +; RV64-LABEL: vector_length_e8vf128_i32: ; RV64: # %bb.0: ; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a1, a1, 2 -; RV64-NEXT: bltu a0, a1, .LBB1_2 +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: bltu a0, a1, .LBB14_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB1_2: +; RV64-NEXT: .LBB14_2: ; RV64-NEXT: ret - %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 8, i32 2) + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 9, i32 128) ret i32 %a } -define i32 @vector_length_XLen(iXLen zeroext %tc) { -; RV32-LABEL: vector_length_XLen: +define i32 @vector_length_e8vf3_i32(i32 zeroext %tc) { +; RV32-LABEL: vector_length_e8vf3_i32: ; RV32: # %bb.0: ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: srli a1, a1, 2 -; RV32-NEXT: bltu a0, a1, .LBB2_2 +; RV32-NEXT: srli a1, a1, 3 +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: bltu a0, a1, .LBB15_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB2_2: +; RV32-NEXT: .LBB15_2: ; RV32-NEXT: ret ; -; RV64-LABEL: vector_length_XLen: +; RV64-LABEL: vector_length_e8vf3_i32: ; RV64: # %bb.0: ; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: srli a1, a1, 2 -; RV64-NEXT: bltu a0, a1, .LBB2_2 +; RV64-NEXT: srli a1, a1, 3 +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: bltu a0, a1, .LBB15_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a0, a1 -; RV64-NEXT: .LBB2_2: +; RV64-NEXT: .LBB15_2: ; RV64-NEXT: ret - %a = call i32 @llvm.experimental.get.vector.length.iXLen(iXLen %tc, i32 8, i32 2) + %a = call i32 @llvm.experimental.get.vector.length.i32(i32 %tc, i32 9, i32 3) ret i32 %a }