diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -528,6 +528,9 @@ bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; + SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + SmallVectorImpl &Created) const override; + private: /// RISCVCCAssignFn - This target-specific function extends the default /// CCValAssign with additional information used to lower RISC-V calling diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10350,6 +10350,60 @@ return SDValue(); } +SDValue +RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N, 0); // Lower SDIV as SDIV + + assert((Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()) && + "Unexpected divisor!"); + + // Conditional move is needed, so do the transformation iff Zbt is enabled. + if (!Subtarget.hasStdExtZbt()) + return SDValue(); + + // When |Divisor| >= 2 ^ 12, it isn't profitable to do such transformation. + // Besides, more critical path instructions will be generated when dividing + // by 2. So we keep using the original DAGs for these cases. + unsigned Lg2 = Divisor.countTrailingZeros(); + if (Lg2 == 1 || Lg2 >= 12) + return SDValue(); + + // fold (sdiv X, pow2) + EVT VT = N->getValueType(0); + if (VT != MVT::i32 && !(Subtarget.is64Bit() && VT == MVT::i64)) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT); + + // Add (N0 < 0) ? Pow2 - 1 : 0; + SDValue Cmp = DAG.getSetCC(DL, VT, N0, Zero, ISD::SETLT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); + SDValue Sel = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); + + Created.push_back(Cmp.getNode()); + Created.push_back(Add.getNode()); + Created.push_back(Sel.getNode()); + + // Divide by pow2. + SDValue SRA = + DAG.getNode(ISD::SRA, DL, VT, Sel, DAG.getConstant(Lg2, DL, VT)); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (Divisor.isNonNegative()) + return SRA; + + Created.push_back(SRA.getNode()); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); +} + #define GET_REGISTER_MATCHER #include "RISCVGenAsmMatcher.inc" diff --git a/llvm/test/CodeGen/RISCV/div-pow2.ll b/llvm/test/CodeGen/RISCV/div-pow2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/div-pow2.ll @@ -0,0 +1,809 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32ZBT +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64ZBT + +define i32 @sdiv32_pow2_2(i32 %a) { +; RV32I-LABEL: sdiv32_pow2_2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srli a1, a0, 31 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 1 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv32_pow2_2: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srli a1, a0, 31 +; RV32ZBT-NEXT: add a0, a0, a1 +; RV32ZBT-NEXT: srai a0, a0, 1 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv32_pow2_2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srliw a1, a0, 31 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 1 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv32_pow2_2: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: srliw a1, a0, 31 +; RV64ZBT-NEXT: addw a0, a0, a1 +; RV64ZBT-NEXT: sraiw a0, a0, 1 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i32 %a, 2 + ret i32 %div +} + +define i32 @sdiv32_pow2_negative_2(i32 %a) { +; RV32I-LABEL: sdiv32_pow2_negative_2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srli a1, a0, 31 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 1 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv32_pow2_negative_2: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srli a1, a0, 31 +; RV32ZBT-NEXT: add a0, a0, a1 +; RV32ZBT-NEXT: srai a0, a0, 1 +; RV32ZBT-NEXT: neg a0, a0 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv32_pow2_negative_2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srliw a1, a0, 31 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 1 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv32_pow2_negative_2: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: srliw a1, a0, 31 +; RV64ZBT-NEXT: addw a0, a0, a1 +; RV64ZBT-NEXT: sraiw a0, a0, 1 +; RV64ZBT-NEXT: neg a0, a0 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i32 %a, -2 + ret i32 %div +} + +define i32 @sdiv32_pow2_2048(i32 %a) { +; RV32I-LABEL: sdiv32_pow2_2048: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 21 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 11 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv32_pow2_2048: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: slti a1, a0, 0 +; RV32ZBT-NEXT: addi a2, a0, 2047 +; RV32ZBT-NEXT: cmov a0, a1, a2, a0 +; RV32ZBT-NEXT: srai a0, a0, 11 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv32_pow2_2048: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 21 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 11 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv32_pow2_2048: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: sext.w a1, a0 +; RV64ZBT-NEXT: addi a2, a0, 2047 +; RV64ZBT-NEXT: slti a1, a1, 0 +; RV64ZBT-NEXT: cmov a0, a1, a2, a0 +; RV64ZBT-NEXT: sraiw a0, a0, 11 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i32 %a, 2048 + ret i32 %div +} + +define i32 @sdiv32_pow2_negative_2048(i32 %a) { +; RV32I-LABEL: sdiv32_pow2_negative_2048: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 21 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 11 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv32_pow2_negative_2048: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: slti a1, a0, 0 +; RV32ZBT-NEXT: addi a2, a0, 2047 +; RV32ZBT-NEXT: cmov a0, a1, a2, a0 +; RV32ZBT-NEXT: srai a0, a0, 11 +; RV32ZBT-NEXT: neg a0, a0 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv32_pow2_negative_2048: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 21 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 11 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv32_pow2_negative_2048: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: sext.w a1, a0 +; RV64ZBT-NEXT: addi a2, a0, 2047 +; RV64ZBT-NEXT: slti a1, a1, 0 +; RV64ZBT-NEXT: cmov a0, a1, a2, a0 +; RV64ZBT-NEXT: sraiw a0, a0, 11 +; RV64ZBT-NEXT: neg a0, a0 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i32 %a, -2048 + ret i32 %div +} + +define i32 @sdiv32_pow2_4096(i32 %a) { +; RV32I-LABEL: sdiv32_pow2_4096: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 20 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 12 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv32_pow2_4096: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srai a1, a0, 31 +; RV32ZBT-NEXT: srli a1, a1, 20 +; RV32ZBT-NEXT: add a0, a0, a1 +; RV32ZBT-NEXT: srai a0, a0, 12 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv32_pow2_4096: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 20 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 12 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv32_pow2_4096: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: sraiw a1, a0, 31 +; RV64ZBT-NEXT: srliw a1, a1, 20 +; RV64ZBT-NEXT: addw a0, a0, a1 +; RV64ZBT-NEXT: sraiw a0, a0, 12 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i32 %a, 4096 + ret i32 %div +} + +define i32 @sdiv32_pow2_negative_4096(i32 %a) { +; RV32I-LABEL: sdiv32_pow2_negative_4096: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 20 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 12 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv32_pow2_negative_4096: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srai a1, a0, 31 +; RV32ZBT-NEXT: srli a1, a1, 20 +; RV32ZBT-NEXT: add a0, a0, a1 +; RV32ZBT-NEXT: srai a0, a0, 12 +; RV32ZBT-NEXT: neg a0, a0 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv32_pow2_negative_4096: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 20 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 12 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv32_pow2_negative_4096: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: sraiw a1, a0, 31 +; RV64ZBT-NEXT: srliw a1, a1, 20 +; RV64ZBT-NEXT: addw a0, a0, a1 +; RV64ZBT-NEXT: sraiw a0, a0, 12 +; RV64ZBT-NEXT: neg a0, a0 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i32 %a, -4096 + ret i32 %div +} + +define i32 @sdiv32_pow2_65536(i32 %a) { +; RV32I-LABEL: sdiv32_pow2_65536: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 16 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv32_pow2_65536: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srai a1, a0, 31 +; RV32ZBT-NEXT: srli a1, a1, 16 +; RV32ZBT-NEXT: add a0, a0, a1 +; RV32ZBT-NEXT: srai a0, a0, 16 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv32_pow2_65536: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 16 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 16 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv32_pow2_65536: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: sraiw a1, a0, 31 +; RV64ZBT-NEXT: srliw a1, a1, 16 +; RV64ZBT-NEXT: addw a0, a0, a1 +; RV64ZBT-NEXT: sraiw a0, a0, 16 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i32 %a, 65536 + ret i32 %div +} + +define i32 @sdiv32_pow2_negative_65536(i32 %a) { +; RV32I-LABEL: sdiv32_pow2_negative_65536: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 16 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv32_pow2_negative_65536: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srai a1, a0, 31 +; RV32ZBT-NEXT: srli a1, a1, 16 +; RV32ZBT-NEXT: add a0, a0, a1 +; RV32ZBT-NEXT: srai a0, a0, 16 +; RV32ZBT-NEXT: neg a0, a0 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv32_pow2_negative_65536: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 16 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 16 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv32_pow2_negative_65536: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: sraiw a1, a0, 31 +; RV64ZBT-NEXT: srliw a1, a1, 16 +; RV64ZBT-NEXT: addw a0, a0, a1 +; RV64ZBT-NEXT: sraiw a0, a0, 16 +; RV64ZBT-NEXT: neg a0, a0 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i32 %a, -65536 + ret i32 %div +} + +define i64 @sdiv64_pow2_2(i64 %a) { +; RV32I-LABEL: sdiv64_pow2_2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srli a2, a1, 31 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: srli a3, a2, 1 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: slli a0, a1, 31 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: srai a1, a1, 1 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv64_pow2_2: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srli a2, a1, 31 +; RV32ZBT-NEXT: add a2, a0, a2 +; RV32ZBT-NEXT: sltu a0, a2, a0 +; RV32ZBT-NEXT: add a1, a1, a0 +; RV32ZBT-NEXT: fsri a0, a2, a1, 1 +; RV32ZBT-NEXT: srai a1, a1, 1 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv64_pow2_2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srli a1, a0, 63 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 1 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv64_pow2_2: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: srli a1, a0, 63 +; RV64ZBT-NEXT: add a0, a0, a1 +; RV64ZBT-NEXT: srai a0, a0, 1 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i64 %a, 2 + ret i64 %div +} + +define i64 @sdiv64_pow2_negative_2(i64 %a) { +; RV32I-LABEL: sdiv64_pow2_negative_2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srli a2, a1, 31 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: srli a3, a2, 1 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: slli a0, a1, 31 +; RV32I-NEXT: or a2, a3, a0 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a2, a2 +; RV32I-NEXT: srai a1, a1, 1 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv64_pow2_negative_2: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srli a2, a1, 31 +; RV32ZBT-NEXT: add a2, a0, a2 +; RV32ZBT-NEXT: sltu a0, a2, a0 +; RV32ZBT-NEXT: add a1, a1, a0 +; RV32ZBT-NEXT: fsri a2, a2, a1, 1 +; RV32ZBT-NEXT: neg a0, a2 +; RV32ZBT-NEXT: snez a2, a2 +; RV32ZBT-NEXT: srai a1, a1, 1 +; RV32ZBT-NEXT: add a1, a1, a2 +; RV32ZBT-NEXT: neg a1, a1 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv64_pow2_negative_2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srli a1, a0, 63 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 1 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv64_pow2_negative_2: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: srli a1, a0, 63 +; RV64ZBT-NEXT: add a0, a0, a1 +; RV64ZBT-NEXT: srai a0, a0, 1 +; RV64ZBT-NEXT: neg a0, a0 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i64 %a, -2 + ret i64 %div +} + +define i64 @sdiv64_pow2_2048(i64 %a) { +; RV32I-LABEL: sdiv64_pow2_2048: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: srli a2, a2, 21 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: srli a3, a2, 11 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: slli a0, a1, 21 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: srai a1, a1, 11 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv64_pow2_2048: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srai a2, a1, 31 +; RV32ZBT-NEXT: srli a2, a2, 21 +; RV32ZBT-NEXT: add a2, a0, a2 +; RV32ZBT-NEXT: sltu a0, a2, a0 +; RV32ZBT-NEXT: add a1, a1, a0 +; RV32ZBT-NEXT: fsri a0, a2, a1, 11 +; RV32ZBT-NEXT: srai a1, a1, 11 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv64_pow2_2048: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 53 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 11 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv64_pow2_2048: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: slti a1, a0, 0 +; RV64ZBT-NEXT: addi a2, a0, 2047 +; RV64ZBT-NEXT: cmov a0, a1, a2, a0 +; RV64ZBT-NEXT: srai a0, a0, 11 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i64 %a, 2048 + ret i64 %div +} + +define i64 @sdiv64_pow2_negative_2048(i64 %a) { +; RV32I-LABEL: sdiv64_pow2_negative_2048: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: srli a2, a2, 21 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: srli a3, a2, 11 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: slli a0, a1, 21 +; RV32I-NEXT: or a2, a3, a0 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a2, a2 +; RV32I-NEXT: srai a1, a1, 11 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv64_pow2_negative_2048: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srai a2, a1, 31 +; RV32ZBT-NEXT: srli a2, a2, 21 +; RV32ZBT-NEXT: add a2, a0, a2 +; RV32ZBT-NEXT: sltu a0, a2, a0 +; RV32ZBT-NEXT: add a1, a1, a0 +; RV32ZBT-NEXT: fsri a2, a2, a1, 11 +; RV32ZBT-NEXT: neg a0, a2 +; RV32ZBT-NEXT: snez a2, a2 +; RV32ZBT-NEXT: srai a1, a1, 11 +; RV32ZBT-NEXT: add a1, a1, a2 +; RV32ZBT-NEXT: neg a1, a1 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv64_pow2_negative_2048: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 53 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 11 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv64_pow2_negative_2048: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: slti a1, a0, 0 +; RV64ZBT-NEXT: addi a2, a0, 2047 +; RV64ZBT-NEXT: cmov a0, a1, a2, a0 +; RV64ZBT-NEXT: srai a0, a0, 11 +; RV64ZBT-NEXT: neg a0, a0 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i64 %a, -2048 + ret i64 %div +} + +define i64 @sdiv64_pow2_4096(i64 %a) { +; RV32I-LABEL: sdiv64_pow2_4096: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: srli a2, a2, 20 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: srli a3, a2, 12 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: slli a0, a1, 20 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: srai a1, a1, 12 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv64_pow2_4096: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srai a2, a1, 31 +; RV32ZBT-NEXT: srli a2, a2, 20 +; RV32ZBT-NEXT: add a2, a0, a2 +; RV32ZBT-NEXT: sltu a0, a2, a0 +; RV32ZBT-NEXT: add a1, a1, a0 +; RV32ZBT-NEXT: fsri a0, a2, a1, 12 +; RV32ZBT-NEXT: srai a1, a1, 12 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv64_pow2_4096: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 52 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 12 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv64_pow2_4096: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: srai a1, a0, 63 +; RV64ZBT-NEXT: srli a1, a1, 52 +; RV64ZBT-NEXT: add a0, a0, a1 +; RV64ZBT-NEXT: srai a0, a0, 12 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i64 %a, 4096 + ret i64 %div +} + +define i64 @sdiv64_pow2_negative_4096(i64 %a) { +; RV32I-LABEL: sdiv64_pow2_negative_4096: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: srli a2, a2, 20 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: srli a3, a2, 12 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: slli a0, a1, 20 +; RV32I-NEXT: or a2, a3, a0 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a2, a2 +; RV32I-NEXT: srai a1, a1, 12 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv64_pow2_negative_4096: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srai a2, a1, 31 +; RV32ZBT-NEXT: srli a2, a2, 20 +; RV32ZBT-NEXT: add a2, a0, a2 +; RV32ZBT-NEXT: sltu a0, a2, a0 +; RV32ZBT-NEXT: add a1, a1, a0 +; RV32ZBT-NEXT: fsri a2, a2, a1, 12 +; RV32ZBT-NEXT: neg a0, a2 +; RV32ZBT-NEXT: snez a2, a2 +; RV32ZBT-NEXT: srai a1, a1, 12 +; RV32ZBT-NEXT: add a1, a1, a2 +; RV32ZBT-NEXT: neg a1, a1 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv64_pow2_negative_4096: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 52 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 12 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv64_pow2_negative_4096: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: srai a1, a0, 63 +; RV64ZBT-NEXT: srli a1, a1, 52 +; RV64ZBT-NEXT: add a0, a0, a1 +; RV64ZBT-NEXT: srai a0, a0, 12 +; RV64ZBT-NEXT: neg a0, a0 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i64 %a, -4096 + ret i64 %div +} + +define i64 @sdiv64_pow2_65536(i64 %a) { +; RV32I-LABEL: sdiv64_pow2_65536: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: srli a2, a2, 16 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: srli a3, a2, 16 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv64_pow2_65536: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srai a2, a1, 31 +; RV32ZBT-NEXT: srli a2, a2, 16 +; RV32ZBT-NEXT: add a2, a0, a2 +; RV32ZBT-NEXT: sltu a0, a2, a0 +; RV32ZBT-NEXT: add a1, a1, a0 +; RV32ZBT-NEXT: fsri a0, a2, a1, 16 +; RV32ZBT-NEXT: srai a1, a1, 16 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv64_pow2_65536: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 48 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 16 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv64_pow2_65536: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: srai a1, a0, 63 +; RV64ZBT-NEXT: srli a1, a1, 48 +; RV64ZBT-NEXT: add a0, a0, a1 +; RV64ZBT-NEXT: srai a0, a0, 16 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i64 %a, 65536 + ret i64 %div +} + +define i64 @sdiv64_pow2_negative_65536(i64 %a) { +; RV32I-LABEL: sdiv64_pow2_negative_65536: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: srli a2, a2, 16 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: srli a3, a2, 16 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: or a2, a3, a0 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a2, a2 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv64_pow2_negative_65536: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srai a2, a1, 31 +; RV32ZBT-NEXT: srli a2, a2, 16 +; RV32ZBT-NEXT: add a2, a0, a2 +; RV32ZBT-NEXT: sltu a0, a2, a0 +; RV32ZBT-NEXT: add a1, a1, a0 +; RV32ZBT-NEXT: fsri a2, a2, a1, 16 +; RV32ZBT-NEXT: neg a0, a2 +; RV32ZBT-NEXT: snez a2, a2 +; RV32ZBT-NEXT: srai a1, a1, 16 +; RV32ZBT-NEXT: add a1, a1, a2 +; RV32ZBT-NEXT: neg a1, a1 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv64_pow2_negative_65536: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 48 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 16 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv64_pow2_negative_65536: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: srai a1, a0, 63 +; RV64ZBT-NEXT: srli a1, a1, 48 +; RV64ZBT-NEXT: add a0, a0, a1 +; RV64ZBT-NEXT: srai a0, a0, 16 +; RV64ZBT-NEXT: neg a0, a0 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i64 %a, -65536 + ret i64 %div +} + +define i64 @sdiv64_pow2_8589934592(i64 %a) { +; RV32I-LABEL: sdiv64_pow2_8589934592: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srli a2, a1, 31 +; RV32I-NEXT: add a2, a1, a2 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: sltu a0, a1, a0 +; RV32I-NEXT: add a1, a2, a0 +; RV32I-NEXT: srai a0, a1, 1 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv64_pow2_8589934592: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srli a2, a1, 31 +; RV32ZBT-NEXT: add a2, a1, a2 +; RV32ZBT-NEXT: srai a1, a1, 31 +; RV32ZBT-NEXT: fsri a1, a1, a1, 31 +; RV32ZBT-NEXT: add a1, a0, a1 +; RV32ZBT-NEXT: sltu a0, a1, a0 +; RV32ZBT-NEXT: add a1, a2, a0 +; RV32ZBT-NEXT: srai a0, a1, 1 +; RV32ZBT-NEXT: srai a1, a1, 31 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv64_pow2_8589934592: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 31 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 33 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv64_pow2_8589934592: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: srai a1, a0, 63 +; RV64ZBT-NEXT: srli a1, a1, 31 +; RV64ZBT-NEXT: add a0, a0, a1 +; RV64ZBT-NEXT: srai a0, a0, 33 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i64 %a, 8589934592 ; 2^33 + ret i64 %div +} + +define i64 @sdiv64_pow2_negative_8589934592(i64 %a) { +; RV32I-LABEL: sdiv64_pow2_negative_8589934592: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srli a2, a1, 31 +; RV32I-NEXT: add a2, a1, a2 +; RV32I-NEXT: srai a1, a1, 31 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: sltu a0, a1, a0 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srai a0, a0, 1 +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: ret +; +; RV32ZBT-LABEL: sdiv64_pow2_negative_8589934592: +; RV32ZBT: # %bb.0: # %entry +; RV32ZBT-NEXT: srli a2, a1, 31 +; RV32ZBT-NEXT: add a2, a1, a2 +; RV32ZBT-NEXT: srai a1, a1, 31 +; RV32ZBT-NEXT: fsri a1, a1, a1, 31 +; RV32ZBT-NEXT: add a1, a0, a1 +; RV32ZBT-NEXT: sltu a0, a1, a0 +; RV32ZBT-NEXT: add a0, a2, a0 +; RV32ZBT-NEXT: srai a1, a0, 31 +; RV32ZBT-NEXT: srai a0, a0, 1 +; RV32ZBT-NEXT: snez a2, a0 +; RV32ZBT-NEXT: add a1, a1, a2 +; RV32ZBT-NEXT: neg a1, a1 +; RV32ZBT-NEXT: neg a0, a0 +; RV32ZBT-NEXT: ret +; +; RV64I-LABEL: sdiv64_pow2_negative_8589934592: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 31 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 33 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBT-LABEL: sdiv64_pow2_negative_8589934592: +; RV64ZBT: # %bb.0: # %entry +; RV64ZBT-NEXT: srai a1, a0, 63 +; RV64ZBT-NEXT: srli a1, a1, 31 +; RV64ZBT-NEXT: add a0, a0, a1 +; RV64ZBT-NEXT: srai a0, a0, 33 +; RV64ZBT-NEXT: neg a0, a0 +; RV64ZBT-NEXT: ret +entry: + %div = sdiv i64 %a, -8589934592 ; -2^33 + ret i64 %div +}