diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -583,6 +583,8 @@ unsigned ExtendOpc) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; + SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + SmallVectorImpl &Created) const override; SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const; SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2162,6 +2162,64 @@ Store->getMemOperand()->getFlags()); } +SDValue +RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N, 0); // Lower SDIV as SDIV + + assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) && + "Unexpected divisor!"); + + // Only perform this transform if Zbt is supported otherwise the select + // below will become a branch. + if (!Subtarget.hasStdExtZbt()) + return SDValue(); + + // fold (sdiv X, pow2) + EVT VT = N->getValueType(0); + // Only support i64 in RV64. + if (!(VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || + (VT == MVT::i64 && Subtarget.is64Bit()))) + return SDValue(); + + unsigned Lg2 = Divisor.countTrailingZeros(); + + // If the divisor is 2, -2 or larger than 12-bits immediate, the default + // expansion is better. + if (Lg2 == 1 || Lg2 > 11) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + SDValue Zero = DAG.getConstant(0, DL, VT); + APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2); + SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT); + + // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right. + SDValue Cmp = DAG.getSetCC(DL, VT, N0, Zero, ISD::SETLT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); + SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); + + Created.push_back(Cmp.getNode()); + Created.push_back(Add.getNode()); + Created.push_back(CMov.getNode()); + + // Divide by pow2. + SDValue SRA = + DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT)); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (Divisor.isNonNegative()) + return SRA; + + Created.push_back(SRA.getNode()); + return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA); +} + SDValue RISCVTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { diff --git a/llvm/test/CodeGen/RISCV/sdiv-pow2.ll b/llvm/test/CodeGen/RISCV/sdiv-pow2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/sdiv-pow2.ll @@ -0,0 +1,367 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV32B %s +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64B %s + +define i32 @foo1(i32 %a) { +; RV32I-LABEL: foo1: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 29 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 3 +; RV32I-NEXT: ret +; +; RV32B-LABEL: foo1: +; RV32B: # %bb.0: # %entry +; RV32B-NEXT: slti a1, a0, 0 +; RV32B-NEXT: addi a2, a0, 7 +; RV32B-NEXT: cmov a0, a1, a2, a0 +; RV32B-NEXT: srai a0, a0, 3 +; RV32B-NEXT: ret +; +; RV64I-LABEL: foo1: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 29 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 3 +; RV64I-NEXT: ret +; +; RV64B-LABEL: foo1: +; RV64B: # %bb.0: # %entry +; RV64B-NEXT: sext.w a1, a0 +; RV64B-NEXT: addi a2, a0, 7 +; RV64B-NEXT: slti a1, a1, 0 +; RV64B-NEXT: cmov a0, a1, a2, a0 +; RV64B-NEXT: sraiw a0, a0, 3 +; RV64B-NEXT: ret +entry: + %div = sdiv i32 %a, 8 + ret i32 %div +} + +define i32 @foo2(i32 %a) { +; RV32I-LABEL: foo2: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 29 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 3 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: ret +; +; RV32B-LABEL: foo2: +; RV32B: # %bb.0: # %entry +; RV32B-NEXT: slti a1, a0, 0 +; RV32B-NEXT: addi a2, a0, 7 +; RV32B-NEXT: cmov a0, a1, a2, a0 +; RV32B-NEXT: srai a0, a0, 3 +; RV32B-NEXT: neg a0, a0 +; RV32B-NEXT: ret +; +; RV64I-LABEL: foo2: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 29 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 3 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64B-LABEL: foo2: +; RV64B: # %bb.0: # %entry +; RV64B-NEXT: sext.w a1, a0 +; RV64B-NEXT: addi a2, a0, 7 +; RV64B-NEXT: slti a1, a1, 0 +; RV64B-NEXT: cmov a0, a1, a2, a0 +; RV64B-NEXT: sraiw a0, a0, 3 +; RV64B-NEXT: neg a0, a0 +; RV64B-NEXT: ret +entry: + %div = sdiv i32 %a, -8 + ret i32 %div +} + +define i64 @foo3(i64 %a) { +; RV32I-LABEL: foo3: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: srli a2, a2, 29 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: srli a3, a2, 3 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: slli a0, a1, 29 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: srai a1, a1, 3 +; RV32I-NEXT: ret +; +; RV32B-LABEL: foo3: +; RV32B: # %bb.0: # %entry +; RV32B-NEXT: srai a2, a1, 31 +; RV32B-NEXT: srli a2, a2, 29 +; RV32B-NEXT: add a2, a0, a2 +; RV32B-NEXT: sltu a0, a2, a0 +; RV32B-NEXT: add a1, a1, a0 +; RV32B-NEXT: fsri a0, a2, a1, 3 +; RV32B-NEXT: srai a1, a1, 3 +; RV32B-NEXT: ret +; +; RV64I-LABEL: foo3: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 61 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 3 +; RV64I-NEXT: ret +; +; RV64B-LABEL: foo3: +; RV64B: # %bb.0: # %entry +; RV64B-NEXT: slti a1, a0, 0 +; RV64B-NEXT: addi a2, a0, 7 +; RV64B-NEXT: cmov a0, a1, a2, a0 +; RV64B-NEXT: srai a0, a0, 3 +; RV64B-NEXT: ret +entry: + %div = sdiv i64 %a, 8 + ret i64 %div +} + +define i64 @foo4(i64 %a) { +; RV32I-LABEL: foo4: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: srli a2, a2, 29 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: srli a3, a2, 3 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: slli a0, a1, 29 +; RV32I-NEXT: or a2, a3, a0 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a2, a2 +; RV32I-NEXT: srai a1, a1, 3 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: ret +; +; RV32B-LABEL: foo4: +; RV32B: # %bb.0: # %entry +; RV32B-NEXT: srai a2, a1, 31 +; RV32B-NEXT: srli a2, a2, 29 +; RV32B-NEXT: add a2, a0, a2 +; RV32B-NEXT: sltu a0, a2, a0 +; RV32B-NEXT: add a1, a1, a0 +; RV32B-NEXT: fsri a2, a2, a1, 3 +; RV32B-NEXT: neg a0, a2 +; RV32B-NEXT: snez a2, a2 +; RV32B-NEXT: srai a1, a1, 3 +; RV32B-NEXT: add a1, a1, a2 +; RV32B-NEXT: neg a1, a1 +; RV32B-NEXT: ret +; +; RV64I-LABEL: foo4: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 61 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 3 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64B-LABEL: foo4: +; RV64B: # %bb.0: # %entry +; RV64B-NEXT: slti a1, a0, 0 +; RV64B-NEXT: addi a2, a0, 7 +; RV64B-NEXT: cmov a0, a1, a2, a0 +; RV64B-NEXT: srai a0, a0, 3 +; RV64B-NEXT: neg a0, a0 +; RV64B-NEXT: ret +entry: + %div = sdiv i64 %a, -8 + ret i64 %div +} + +define i32 @foo5(i32 %a) { +; RV32I-LABEL: foo5: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 20 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 12 +; RV32I-NEXT: ret +; +; RV32B-LABEL: foo5: +; RV32B: # %bb.0: # %entry +; RV32B-NEXT: srai a1, a0, 31 +; RV32B-NEXT: srli a1, a1, 20 +; RV32B-NEXT: add a0, a0, a1 +; RV32B-NEXT: srai a0, a0, 12 +; RV32B-NEXT: ret +; +; RV64I-LABEL: foo5: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 20 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 12 +; RV64I-NEXT: ret +; +; RV64B-LABEL: foo5: +; RV64B: # %bb.0: # %entry +; RV64B-NEXT: sraiw a1, a0, 31 +; RV64B-NEXT: srliw a1, a1, 20 +; RV64B-NEXT: addw a0, a0, a1 +; RV64B-NEXT: sraiw a0, a0, 12 +; RV64B-NEXT: ret +entry: + %div = sdiv i32 %a, 4096 + ret i32 %div +} + +define i32 @foo6(i32 %a) { +; RV32I-LABEL: foo6: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: srli a1, a1, 20 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: srai a0, a0, 12 +; RV32I-NEXT: neg a0, a0 +; RV32I-NEXT: ret +; +; RV32B-LABEL: foo6: +; RV32B: # %bb.0: # %entry +; RV32B-NEXT: srai a1, a0, 31 +; RV32B-NEXT: srli a1, a1, 20 +; RV32B-NEXT: add a0, a0, a1 +; RV32B-NEXT: srai a0, a0, 12 +; RV32B-NEXT: neg a0, a0 +; RV32B-NEXT: ret +; +; RV64I-LABEL: foo6: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: sraiw a1, a0, 31 +; RV64I-NEXT: srliw a1, a1, 20 +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: sraiw a0, a0, 12 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64B-LABEL: foo6: +; RV64B: # %bb.0: # %entry +; RV64B-NEXT: sraiw a1, a0, 31 +; RV64B-NEXT: srliw a1, a1, 20 +; RV64B-NEXT: addw a0, a0, a1 +; RV64B-NEXT: sraiw a0, a0, 12 +; RV64B-NEXT: neg a0, a0 +; RV64B-NEXT: ret +entry: + %div = sdiv i32 %a, -4096 + ret i32 %div +} + +define i64 @foo7(i64 %a) { +; RV32I-LABEL: foo7: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: srli a2, a2, 20 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: srli a3, a2, 12 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: slli a0, a1, 20 +; RV32I-NEXT: or a0, a3, a0 +; RV32I-NEXT: srai a1, a1, 12 +; RV32I-NEXT: ret +; +; RV32B-LABEL: foo7: +; RV32B: # %bb.0: # %entry +; RV32B-NEXT: srai a2, a1, 31 +; RV32B-NEXT: srli a2, a2, 20 +; RV32B-NEXT: add a2, a0, a2 +; RV32B-NEXT: sltu a0, a2, a0 +; RV32B-NEXT: add a1, a1, a0 +; RV32B-NEXT: fsri a0, a2, a1, 12 +; RV32B-NEXT: srai a1, a1, 12 +; RV32B-NEXT: ret +; +; RV64I-LABEL: foo7: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 52 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 12 +; RV64I-NEXT: ret +; +; RV64B-LABEL: foo7: +; RV64B: # %bb.0: # %entry +; RV64B-NEXT: srai a1, a0, 63 +; RV64B-NEXT: srli a1, a1, 52 +; RV64B-NEXT: add a0, a0, a1 +; RV64B-NEXT: srai a0, a0, 12 +; RV64B-NEXT: ret +entry: + %div = sdiv i64 %a, 4096 + ret i64 %div +} + +define i64 @foo8(i64 %a) { +; RV32I-LABEL: foo8: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: srli a2, a2, 20 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: srli a3, a2, 12 +; RV32I-NEXT: sltu a0, a2, a0 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: slli a0, a1, 20 +; RV32I-NEXT: or a2, a3, a0 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a2, a2 +; RV32I-NEXT: srai a1, a1, 12 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: ret +; +; RV32B-LABEL: foo8: +; RV32B: # %bb.0: # %entry +; RV32B-NEXT: srai a2, a1, 31 +; RV32B-NEXT: srli a2, a2, 20 +; RV32B-NEXT: add a2, a0, a2 +; RV32B-NEXT: sltu a0, a2, a0 +; RV32B-NEXT: add a1, a1, a0 +; RV32B-NEXT: fsri a2, a2, a1, 12 +; RV32B-NEXT: neg a0, a2 +; RV32B-NEXT: snez a2, a2 +; RV32B-NEXT: srai a1, a1, 12 +; RV32B-NEXT: add a1, a1, a2 +; RV32B-NEXT: neg a1, a1 +; RV32B-NEXT: ret +; +; RV64I-LABEL: foo8: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: srli a1, a1, 52 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: srai a0, a0, 12 +; RV64I-NEXT: neg a0, a0 +; RV64I-NEXT: ret +; +; RV64B-LABEL: foo8: +; RV64B: # %bb.0: # %entry +; RV64B-NEXT: srai a1, a0, 63 +; RV64B-NEXT: srli a1, a1, 52 +; RV64B-NEXT: add a0, a0, a1 +; RV64B-NEXT: srai a0, a0, 12 +; RV64B-NEXT: neg a0, a0 +; RV64B-NEXT: ret +entry: + %div = sdiv i64 %a, -4096 + ret i64 %div +}