diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -159,6 +159,22 @@ ImmArg>, ImmArg>]>; + // Versions without side effects: better optimizable and usable if only the + // returned vector length is important. + def int_riscv_vsetvli_opt : Intrinsic<[llvm_anyint_ty], + /* AVL */ [LLVMMatchType<0>, + /* VSEW */ LLVMMatchType<0>, + /* VLMUL */ LLVMMatchType<0>], + [IntrNoMem, + ImmArg>, + ImmArg>]>; + def int_riscv_vsetvlimax_opt : Intrinsic<[llvm_anyint_ty], + /* VSEW */ [LLVMMatchType<0>, + /* VLMUL */ LLVMMatchType<0>], + [IntrNoMem, + ImmArg>, + ImmArg>]>; + // For unit stride load // Input: (pointer, vl) class RISCVUSLoad diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -87,6 +87,8 @@ void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided); void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered); + void selectVSETVLI(SDNode *Node); + // Return the RISC-V condition code that matches the given DAG integer // condition code. The CondCode must be one of those supported by the RISC-V // ISA (see translateSetCCForBranch). diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -495,6 +495,75 @@ ReplaceNode(Node, Store); } +void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) { + if (!Subtarget->hasVInstructions()) + return; + + assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN || + Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) && + "Unexpected opcode"); + + SDLoc DL(Node); + MVT XLenVT = Subtarget->getXLenVT(); + + bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN; + unsigned IntNoOffset = HasChain ? 1 : 0; + unsigned IntNo = Node->getConstantOperandVal(IntNoOffset); + + assert((IntNo == Intrinsic::riscv_vsetvli || + IntNo == Intrinsic::riscv_vsetvlimax || + IntNo == Intrinsic::riscv_vsetvli_opt || + IntNo == Intrinsic::riscv_vsetvlimax_opt) && + "Unexpected vsetvli intrinsic"); + + bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax || + IntNo == Intrinsic::riscv_vsetvlimax_opt; + unsigned Offset = IntNoOffset + (VLMax ? 1 : 2); + + assert(Node->getNumOperands() == Offset + 2 && + "Unexpected number of operands"); + + unsigned SEW = + RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); + RISCVII::VLMUL VLMul = static_cast( + Node->getConstantOperandVal(Offset + 1) & 0x7); + + unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true, + /*MaskAgnostic*/ false); + SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); + + SmallVector VTs = {XLenVT}; + if (HasChain) + VTs.push_back(MVT::Other); + + SDValue VLOperand; + unsigned Opcode = RISCV::PseudoVSETVLI; + if (VLMax) { + VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); + Opcode = RISCV::PseudoVSETVLIX0; + } else { + VLOperand = Node->getOperand(IntNoOffset + 1); + + if (auto *C = dyn_cast(VLOperand)) { + uint64_t AVL = C->getZExtValue(); + if (isUInt<5>(AVL)) { + SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); + SmallVector Ops = {VLImm, VTypeIOp}; + if (HasChain) + Ops.push_back(Node->getOperand(0)); + ReplaceNode( + Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops)); + return; + } + } + } + + SmallVector Ops = {VLOperand, VTypeIOp}; + if (HasChain) + Ops.push_back(Node->getOperand(0)); + + ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops)); +} void RISCVDAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we have already selected. @@ -1017,6 +1086,9 @@ {Cmp, Mask, VL, MaskSEW})); return; } + case Intrinsic::riscv_vsetvli_opt: + case Intrinsic::riscv_vsetvlimax_opt: + return selectVSETVLI(Node); } break; } @@ -1026,54 +1098,9 @@ // By default we do not custom select any intrinsic. default: break; - case Intrinsic::riscv_vsetvli: - case Intrinsic::riscv_vsetvlimax: { - if (!Subtarget->hasVInstructions()) - break; - - bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax; - unsigned Offset = VLMax ? 2 : 3; - - assert(Node->getNumOperands() == Offset + 2 && - "Unexpected number of operands"); - - unsigned SEW = - RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7); - RISCVII::VLMUL VLMul = static_cast( - Node->getConstantOperandVal(Offset + 1) & 0x7); - - unsigned VTypeI = RISCVVType::encodeVTYPE( - VLMul, SEW, /*TailAgnostic*/ true, /*MaskAgnostic*/ false); - SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT); - - SDValue VLOperand; - unsigned Opcode = RISCV::PseudoVSETVLI; - if (VLMax) { - VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT); - Opcode = RISCV::PseudoVSETVLIX0; - } else { - VLOperand = Node->getOperand(2); - - if (auto *C = dyn_cast(VLOperand)) { - uint64_t AVL = C->getZExtValue(); - if (isUInt<5>(AVL)) { - SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT); - ReplaceNode( - Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT, - MVT::Other, VLImm, VTypeIOp, - /* Chain */ Node->getOperand(0))); - return; - } - } - } - - ReplaceNode(Node, - CurDAG->getMachineNode(Opcode, DL, XLenVT, - MVT::Other, VLOperand, VTypeIOp, - /* Chain */ Node->getOperand(0))); - return; - } + case Intrinsic::riscv_vsetvlimax: + return selectVSETVLI(Node); case Intrinsic::riscv_vlseg2: case Intrinsic::riscv_vlseg3: case Intrinsic::riscv_vlseg4: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8162,14 +8162,18 @@ // We assume VLENB is no more than 65536 / 8 bytes. Known.Zero.setBitsFrom(14); break; - case ISD::INTRINSIC_W_CHAIN: { - unsigned IntNo = Op.getConstantOperandVal(1); + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = + Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1); switch (IntNo) { default: // We can't do anything for most intrinsics. break; case Intrinsic::riscv_vsetvli: case Intrinsic::riscv_vsetvlimax: + case Intrinsic::riscv_vsetvli_opt: + case Intrinsic::riscv_vsetvlimax_opt: // Assume that VL output is positive and would fit in an int32_t. // TODO: VLEN might be capped at 16 bits in a future V spec update. if (BitWidth >= 32) diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll @@ -3,6 +3,8 @@ declare i32 @llvm.riscv.vsetvli.i32(i32, i32, i32) declare i32 @llvm.riscv.vsetvlimax.i32(i32, i32) +declare i32 @llvm.riscv.vsetvli.opt.i32(i32, i32, i32) +declare i32 @llvm.riscv.vsetvlimax.opt.i32(i32, i32) define void @test_vsetvli_e64mf8(i32 %avl) nounwind { ; CHECK-LABEL: test_vsetvli_e64mf8: @@ -31,6 +33,68 @@ ret void } +define i32 @test_vsetvli_opt_e8m1(i32 %avl) nounwind { +; CHECK-LABEL: test_vsetvli_opt_e8m1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m1, ta, mu +; CHECK-NEXT: ret + %vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 0, i32 0) + ret i32 %vl +} + +; Check that we remove the intrinsic if it's unused. +define void @test_vsetvli_opt_e8m1_nouse(i32 %avl) nounwind { +; CHECK-LABEL: test_vsetvli_opt_e8m1_nouse: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 0, i32 0) + ret void +} + +define i32 @test_vsetvli_opt_e16mf4(i32 %avl) nounwind { +; CHECK-LABEL: test_vsetvli_opt_e16mf4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16, mf4, ta, mu +; CHECK-NEXT: ret + %vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 1, i32 6) + ret i32 %vl +} + +define i32 @test_vsetvli_opt_e32mf8_zero_avl() nounwind { +; CHECK-LABEL: test_vsetvli_opt_e32mf8_zero_avl: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 0, e16, mf4, ta, mu +; CHECK-NEXT: ret + %vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 0, i32 1, i32 6) + ret i32 %vl +} + +define i32 @test_vsetvlimax_opt_e32m2() nounwind { +; CHECK-LABEL: test_vsetvlimax_opt_e32m2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: ret + %vl = call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 2, i32 1) + ret i32 %vl +} + +define void @test_vsetvlimax_opt_e32m2_nouse() nounwind { +; CHECK-LABEL: test_vsetvlimax_opt_e32m2_nouse: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 2, i32 1) + ret void +} + +define i32 @test_vsetvlimax_opt_e64m4() nounwind { +; CHECK-LABEL: test_vsetvlimax_opt_e64m4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: ret + %vl = call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 3, i32 2) + ret i32 %vl +} + declare @llvm.riscv.vle.nxv4i32.i32(*, i32) ; Check that we remove the redundant vsetvli when followed by another operation diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll @@ -3,6 +3,8 @@ declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64) declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64) +declare i64 @llvm.riscv.vsetvli.opt.i64(i64, i64, i64) +declare i64 @llvm.riscv.vsetvlimax.opt.i64(i64, i64) define void @test_vsetvli_e8m1(i64 %avl) nounwind { ; CHECK-LABEL: test_vsetvli_e8m1: @@ -49,6 +51,68 @@ ret void } +define i64 @test_vsetvli_opt_e8m1(i64 %avl) nounwind { +; CHECK-LABEL: test_vsetvli_opt_e8m1: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e8, m1, ta, mu +; CHECK-NEXT: ret + %vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 0, i64 0) + ret i64 %vl +} + +; Check that we remove the intrinsic if it's unused. +define void @test_vsetvli_opt_e8m1_nouse(i64 %avl) nounwind { +; CHECK-LABEL: test_vsetvli_opt_e8m1_nouse: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 0, i64 0) + ret void +} + +define i64 @test_vsetvli_opt_e16mf4(i64 %avl) nounwind { +; CHECK-LABEL: test_vsetvli_opt_e16mf4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, a0, e16, mf4, ta, mu +; CHECK-NEXT: ret + %vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 1, i64 6) + ret i64 %vl +} + +define i64 @test_vsetvli_opt_e32mf8_zero_avl() nounwind { +; CHECK-LABEL: test_vsetvli_opt_e32mf8_zero_avl: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli a0, 0, e16, mf4, ta, mu +; CHECK-NEXT: ret + %vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 0, i64 1, i64 6) + ret i64 %vl +} + +define i64 @test_vsetvlimax_opt_e32m2() nounwind { +; CHECK-LABEL: test_vsetvlimax_opt_e32m2: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu +; CHECK-NEXT: ret + %vl = call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 2, i64 1) + ret i64 %vl +} + +define void @test_vsetvlimax_opt_e32m2_nouse() nounwind { +; CHECK-LABEL: test_vsetvlimax_opt_e32m2_nouse: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 2, i64 1) + ret void +} + +define i64 @test_vsetvlimax_opt_e64m4() nounwind { +; CHECK-LABEL: test_vsetvlimax_opt_e64m4: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu +; CHECK-NEXT: ret + %vl = call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 3, i64 2) + ret i64 %vl +} + declare @llvm.riscv.vle.nxv4i32.i64(*, i64) ; Check that we remove the redundant vsetvli when followed by another operation