Index: lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- lib/Target/RISCV/RISCVISelLowering.h +++ lib/Target/RISCV/RISCVISelLowering.h @@ -36,7 +36,12 @@ // instructions. SLLW, SRAW, - SRLW + SRLW, + // 32-bit operations from RV64M that can't be simply matched with a pattern + // at instruction selection time. + DIVW, + DIVUW, + REMUW }; } Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -80,7 +80,6 @@ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); if (Subtarget.is64Bit()) { - setTargetDAGCombine(ISD::ANY_EXTEND); setOperationAction(ISD::SHL, MVT::i32, Custom); setOperationAction(ISD::SRA, MVT::i32, Custom); setOperationAction(ISD::SRL, MVT::i32, Custom); @@ -96,6 +95,12 @@ setOperationAction(ISD::UREM, XLenVT, Expand); } + if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) { + setOperationAction(ISD::SDIV, MVT::i32, Custom); + setOperationAction(ISD::UDIV, MVT::i32, Custom); + setOperationAction(ISD::UREM, MVT::i32, Custom); + } + setOperationAction(ISD::SDIVREM, XLenVT, Expand); setOperationAction(ISD::UDIVREM, XLenVT, Expand); setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand); @@ -524,6 +529,12 @@ return RISCVISD::SRAW; case ISD::SRL: return RISCVISD::SRLW; + case ISD::SDIV: + return RISCVISD::DIVW; + case ISD::UDIV: + return RISCVISD::DIVUW; + case ISD::UREM: + return RISCVISD::REMUW; } } @@ -558,46 +569,24 @@ return; Results.push_back(customLegalizeToWOp(N, DAG)); break; - } -} - -// Returns true if the given node is an sdiv, udiv, or urem with non-constant -// operands. -static bool isVariableSDivUDivURem(SDValue Val) { - switch (Val.getOpcode()) { - default: - return false; case ISD::SDIV: case ISD::UDIV: case ISD::UREM: - return Val.getOperand(0).getOpcode() != ISD::Constant && - Val.getOperand(1).getOpcode() != ISD::Constant; + assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && + Subtarget.hasStdExtM() && "Unexpected custom legalisation"); + if (N->getOperand(0).getOpcode() == ISD::Constant || + N->getOperand(1).getOpcode() == ISD::Constant) + return; + Results.push_back(customLegalizeToWOp(N, DAG)); + break; } } SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { - SelectionDAG &DAG = DCI.DAG; - switch (N->getOpcode()) { default: break; - case ISD::ANY_EXTEND: { - // If any-extending an i32 sdiv/udiv/urem to i64, then instead sign-extend - // in order to increase the chance of being able to select the - // divw/divuw/remuw instructions. - SDValue Src = N->getOperand(0); - if (N->getValueType(0) != MVT::i64 || Src.getValueType() != MVT::i32) - break; - if (!(Subtarget.hasStdExtM() && isVariableSDivUDivURem(Src))) - break; - SDLoc DL(N); - // Don't add the new node to the DAGCombiner worklist, in order to avoid - // an infinite cycle due to SimplifyDemandedBits converting the - // SIGN_EXTEND back to ANY_EXTEND. - return DCI.CombineTo(N, DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src), - false); - } case RISCVISD::SplitF64: { // If the input to SplitF64 is just BuildPairF64 then the operation is // redundant. Instead, use BuildPairF64's operands directly. @@ -633,6 +622,9 @@ case RISCVISD::SLLW: case RISCVISD::SRAW: case RISCVISD::SRLW: + case RISCVISD::DIVW: + case RISCVISD::DIVUW: + case RISCVISD::REMUW: // TODO: As the result is sign-extended, this is conservatively correct. A // more precise answer could be calculated for SRAW depending on known // bits in the shift amount. @@ -1736,6 +1728,12 @@ return "RISCVISD::SRAW"; case RISCVISD::SRLW: return "RISCVISD::SRLW"; + case RISCVISD::DIVW: + return "RISCVISD::DIVW"; + case RISCVISD::DIVUW: + return "RISCVISD::DIVUW"; + case RISCVISD::REMUW: + return "RISCVISD::REMUW"; } return nullptr; } Index: lib/Target/RISCV/RISCVInstrInfoM.td =================================================================== --- lib/Target/RISCV/RISCVInstrInfoM.td +++ lib/Target/RISCV/RISCVInstrInfoM.td @@ -11,6 +11,14 @@ // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// RISC-V specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def riscv_divw : SDNode<"RISCVISD::DIVW", SDTIntBinOp>; +def riscv_divuw : SDNode<"RISCVISD::DIVUW", SDTIntBinOp>; +def riscv_remuw : SDNode<"RISCVISD::REMUW", SDTIntBinOp>; + //===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -52,18 +60,19 @@ let Predicates = [HasStdExtM, IsRV64] in { def : Pat<(sext_inreg (mul GPR:$rs1, GPR:$rs2), i32), (MULW GPR:$rs1, GPR:$rs2)>; -def : Pat<(sext_inreg (sdiv (sexti32 GPR:$rs1), - (sexti32 GPR:$rs2)), i32), - (DIVW GPR:$rs1, GPR:$rs2)>; -def : Pat<(zexti32 (sdiv (sexti32 GPR:$rs1), - (sexti32 GPR:$rs2))), - (SRLI (SLLI (DIVW GPR:$rs1, GPR:$rs2), 32), 32)>; -def : Pat<(sext_inreg (udiv (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32), - (DIVUW GPR:$rs1, GPR:$rs2)>; -// It's cheaper to perform a divuw and zero-extend the result than to -// zero-extend both inputs to a udiv. -def : Pat<(udiv (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)), - (SRLI (SLLI (DIVUW GPR:$rs1, GPR:$rs2), 32), 32)>; + +def : PatGprGpr; +def : PatGprGpr; +def : PatGprGpr; + +// Handle the specific cases where using DIVU/REMU would be correct and result +// in fewer instructions than emitting DIVUW/REMUW then zero-extending the +// result. +def : Pat<(zexti32 (riscv_divuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))), + (DIVU GPR:$rs1, GPR:$rs2)>; +def : Pat<(zexti32 (riscv_remuw (zexti32 GPR:$rs1), (zexti32 GPR:$rs2))), + (REMU GPR:$rs1, GPR:$rs2)>; + // Although the sexti32 operands may not have originated from an i32 srem, // this pattern is safe as it is impossible for two sign extended inputs to // produce a result where res[63:32]=0 and res[31]=1. @@ -72,10 +81,4 @@ def : Pat<(sext_inreg (srem (sexti32 GPR:$rs1), (sexti32 GPR:$rs2)), i32), (REMW GPR:$rs1, GPR:$rs2)>; -def : Pat<(sext_inreg (urem (zexti32 GPR:$rs1), (zexti32 GPR:$rs2)), i32), - (REMUW GPR:$rs1, GPR:$rs2)>; -// It's cheaper to perform a remuw and zero-extend the result than to -// zero-extend both inputs to a urem. -def : Pat<(urem (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff)), - (SRLI (SLLI (REMUW GPR:$rs1, GPR:$rs2), 32), 32)>; } // Predicates = [HasStdExtM, IsRV64] Index: test/CodeGen/RISCV/rv64m-exhaustive-w-insts.ll =================================================================== --- test/CodeGen/RISCV/rv64m-exhaustive-w-insts.ll +++ test/CodeGen/RISCV/rv64m-exhaustive-w-insts.ll @@ -454,9 +454,9 @@ define zeroext i32 @zext_divuw_aext_zext(i32 %a, i32 zeroext %b) nounwind { ; RV64IM-LABEL: zext_divuw_aext_zext: ; RV64IM: # %bb.0: +; RV64IM-NEXT: divuw a0, a0, a1 ; RV64IM-NEXT: slli a0, a0, 32 ; RV64IM-NEXT: srli a0, a0, 32 -; RV64IM-NEXT: divu a0, a0, a1 ; RV64IM-NEXT: ret %1 = udiv i32 %a, %b ret i32 %1 @@ -487,9 +487,9 @@ define zeroext i32 @zext_divuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind { ; RV64IM-LABEL: zext_divuw_sext_zext: ; RV64IM: # %bb.0: +; RV64IM-NEXT: divuw a0, a0, a1 ; RV64IM-NEXT: slli a0, a0, 32 ; RV64IM-NEXT: srli a0, a0, 32 -; RV64IM-NEXT: divu a0, a0, a1 ; RV64IM-NEXT: ret %1 = udiv i32 %a, %b ret i32 %1 @@ -498,9 +498,9 @@ define zeroext i32 @zext_divuw_zext_aext(i32 zeroext %a, i32 %b) nounwind { ; RV64IM-LABEL: zext_divuw_zext_aext: ; RV64IM: # %bb.0: -; RV64IM-NEXT: slli a1, a1, 32 -; RV64IM-NEXT: srli a1, a1, 32 -; RV64IM-NEXT: divu a0, a0, a1 +; RV64IM-NEXT: divuw a0, a0, a1 +; RV64IM-NEXT: slli a0, a0, 32 +; RV64IM-NEXT: srli a0, a0, 32 ; RV64IM-NEXT: ret %1 = udiv i32 %a, %b ret i32 %1 @@ -509,9 +509,9 @@ define zeroext i32 @zext_divuw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind { ; RV64IM-LABEL: zext_divuw_zext_sext: ; RV64IM: # %bb.0: -; RV64IM-NEXT: slli a1, a1, 32 -; RV64IM-NEXT: srli a1, a1, 32 -; RV64IM-NEXT: divu a0, a0, a1 +; RV64IM-NEXT: divuw a0, a0, a1 +; RV64IM-NEXT: slli a0, a0, 32 +; RV64IM-NEXT: srli a0, a0, 32 ; RV64IM-NEXT: ret %1 = udiv i32 %a, %b ret i32 %1 @@ -1235,9 +1235,9 @@ define zeroext i32 @zext_remuw_aext_zext(i32 %a, i32 zeroext %b) nounwind { ; RV64IM-LABEL: zext_remuw_aext_zext: ; RV64IM: # %bb.0: +; RV64IM-NEXT: remuw a0, a0, a1 ; RV64IM-NEXT: slli a0, a0, 32 ; RV64IM-NEXT: srli a0, a0, 32 -; RV64IM-NEXT: remu a0, a0, a1 ; RV64IM-NEXT: ret %1 = urem i32 %a, %b ret i32 %1 @@ -1268,9 +1268,9 @@ define zeroext i32 @zext_remuw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind { ; RV64IM-LABEL: zext_remuw_sext_zext: ; RV64IM: # %bb.0: +; RV64IM-NEXT: remuw a0, a0, a1 ; RV64IM-NEXT: slli a0, a0, 32 ; RV64IM-NEXT: srli a0, a0, 32 -; RV64IM-NEXT: remu a0, a0, a1 ; RV64IM-NEXT: ret %1 = urem i32 %a, %b ret i32 %1 @@ -1279,9 +1279,9 @@ define zeroext i32 @zext_remuw_zext_aext(i32 zeroext %a, i32 %b) nounwind { ; RV64IM-LABEL: zext_remuw_zext_aext: ; RV64IM: # %bb.0: -; RV64IM-NEXT: slli a1, a1, 32 -; RV64IM-NEXT: srli a1, a1, 32 -; RV64IM-NEXT: remu a0, a0, a1 +; RV64IM-NEXT: remuw a0, a0, a1 +; RV64IM-NEXT: slli a0, a0, 32 +; RV64IM-NEXT: srli a0, a0, 32 ; RV64IM-NEXT: ret %1 = urem i32 %a, %b ret i32 %1 @@ -1290,9 +1290,9 @@ define zeroext i32 @zext_remuw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind { ; RV64IM-LABEL: zext_remuw_zext_sext: ; RV64IM: # %bb.0: -; RV64IM-NEXT: slli a1, a1, 32 -; RV64IM-NEXT: srli a1, a1, 32 -; RV64IM-NEXT: remu a0, a0, a1 +; RV64IM-NEXT: remuw a0, a0, a1 +; RV64IM-NEXT: slli a0, a0, 32 +; RV64IM-NEXT: srli a0, a0, 32 ; RV64IM-NEXT: ret %1 = urem i32 %a, %b ret i32 %1