diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -516,6 +516,9 @@ bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; + SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + SmallVectorImpl &Created) const override; + private: /// RISCVCCAssignFn - This target-specific function extends the default /// CCValAssign with additional information used to lower RISC-V calling diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10088,6 +10088,52 @@ return SDValue(); } +SDValue +RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl &Created) const { + // only for Zbt. + if (!Subtarget.hasStdExtZbt()) + return SDValue(); + + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N, 0); // Lower SDIV as SDIV + + // fold (sdiv X, pow2) + EVT VT = N->getValueType(0); + if ((VT != MVT::i32 && VT != MVT::i64) || + !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + unsigned Lg2 = Divisor.countTrailingZeros(); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT); + + // Add (N0 < 0) ? Pow2 - 1 : 0; + SDValue Cmp = DAG.getSetCC(DL, VT, N0, Zero, ISD::SETLT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); + SDValue Sel = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); + + Created.push_back(Cmp.getNode()); + Created.push_back(Add.getNode()); + Created.push_back(Sel.getNode()); + + // Divide by pow2. + SDValue SRA = + DAG.getNode(ISD::SRA, DL, VT, Sel, DAG.getConstant(Lg2, DL, VT)); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (Divisor.isNonNegative()) + return SRA; + + Created.push_back(SRA.getNode()); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); +} + #define GET_REGISTER_MATCHER #include "RISCVGenAsmMatcher.inc" diff --git a/llvm/test/CodeGen/RISCV/rv32zbt-div-pow2.ll b/llvm/test/CodeGen/RISCV/rv32zbt-div-pow2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv32zbt-div-pow2.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32ZBT + +define i32 @sdiv_pow2(i32 %a) { +; RV32I-LABEL: sdiv_pow2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 22 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 10 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv_pow2: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: slti a1, a0, 0 +; RV32ZBT-NEXT: addi a2, a0, 1023 +; RV32ZBT-NEXT: cmov a0, a1, a2, a0 +; RV32ZBT-NEXT: srai a0, a0, 10 +; RV32ZBT-NEXT: neg a0, a0 +; RV32ZBT-NEXT: ret +entry: + %div = sdiv i32 %a, -1024 + ret i32 %div +} diff --git a/llvm/test/CodeGen/RISCV/rv64zbt-div-pow2.ll b/llvm/test/CodeGen/RISCV/rv64zbt-div-pow2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rv64zbt-div-pow2.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64ZBT + +define i32 @sdiv_pow2_32(i32 %a) { +; RV64I-LABEL: sdiv_pow2_32: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 22 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 10 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv_pow2_32: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: sext.w a1, a0 +; RV64ZBT-NEXT: addi a2, a0, 1023 +; RV64ZBT-NEXT: slti a1, a1, 0 +; RV64ZBT-NEXT: cmov a0, a1, a2, a0 +; RV64ZBT-NEXT: sraiw a0, a0, 10 +; RV64ZBT-NEXT: neg a0, a0 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i32 %a, -1024 + ret i32 %div +} + +define i64 @sdiv_pow2_64(i64 %a) { +; RV64I-LABEL: sdiv_pow2_64: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 54 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 10 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv_pow2_64: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: slti a1, a0, 0 +; RV64ZBT-NEXT: addi a2, a0, 1023 +; RV64ZBT-NEXT: cmov a0, a1, a2, a0 +; RV64ZBT-NEXT: srai a0, a0, 10 +; RV64ZBT-NEXT: neg a0, a0 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i64 %a, -1024 + ret i64 %div +}