Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -2545,6 +2545,11 @@ SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector *Created) const; + virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + std::vector *Created) const { + return SDValue(); + } //===--------------------------------------------------------------------===// // Legalization utility functions Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -299,6 +299,7 @@ SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); + SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); @@ -2014,6 +2015,11 @@ if (TLI.isPow2DivCheap()) return SDValue(); + // Target-specific implementation of sdiv x, pow2. + SDValue Res = BuildSDIVPow2(N); + if (Res.getNode()) + return Res; + unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register @@ -11463,9 +11469,9 @@ return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); } -/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, -/// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: +/// BuildSDIV - Given an ISD::SDIV node expressing a divide by constant, return +/// a DAG expression to select that will generate the same value by multiplying +/// by a magic number. See: /// SDValue DAGCombiner::BuildSDIV(SDNode *N) { ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); @@ -11485,6 +11491,26 @@ return S; } +/// BuildSDIVPow2 - Given an ISD::SDIV node expressing a divide by constant +/// power of 2, return a DAG expression to select that will generate the same +/// value by right shifting. +SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (!C->getAPIntValue()) + return SDValue(); + + std::vector Built; + SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); + + for (SDNode *N : Built) + AddToWorklist(N); + return S; +} + /// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -424,6 +424,9 @@ SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; + SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + std::vector *Created) const; + ConstraintType getConstraintType(const std::string &Constraint) const override; unsigned getRegisterByName(const char* RegName, EVT VT) const override; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6382,6 +6382,48 @@ return performIntegerAbsCombine(N, DAG); } +SDValue +AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + std::vector *Created) const { + // fold (sdiv X, pow2) + EVT VT = N->getValueType(0); + if ((VT != MVT::i32 && VT != MVT::i64) || + !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2())) + return SDValue(); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + unsigned Lg2 = Divisor.countTrailingZeros(); + SDValue Zero = DAG.getConstant(0, VT); + SDValue Pow2MinusOne = DAG.getConstant((1 << Lg2) - 1, VT); + + // Add (N0 < 0) ? Pow2 - 1 : 0; + SDValue CCVal; + SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); + SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp); + + if (Created) { + Created->push_back(Cmp.getNode()); + Created->push_back(Add.getNode()); + Created->push_back(CSel.getNode()); + } + + // Divide by pow2. + SDValue SRA = + DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, MVT::i64)); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (Divisor.isNonNegative()) + return SRA; + + if (Created) + Created->push_back(SRA.getNode()); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), SRA); +} + static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { Index: test/CodeGen/AArch64/sdivpow2.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/sdivpow2.ll @@ -0,0 +1,61 @@ +; RUN: llc -mtriple=arm64-linux-gnu -o - %s | FileCheck %s + +define i32 @test1(i32 %x) { +; CHECK-LABEL: test1 +; CHECK: add w8, w0, #7 +; CHECK: cmp w0, #0 +; CHECK: csel w8, w8, w0, lt +; CHECK: asr w0, w8, #3 + %div = sdiv i32 %x, 8 + ret i32 %div +} + +define i32 @test2(i32 %x) { +; CHECK-LABEL: test2 +; CHECK: add w8, w0, #7 +; CHECK: cmp w0, #0 +; CHECK: csel w8, w8, w0, lt +; CHECK: neg w0, w8, asr #3 + %div = sdiv i32 %x, -8 + ret i32 %div +} + +define i32 @test3(i32 %x) { +; CHECK-LABEL: test3 +; CHECK: add w8, w0, #31 +; CHECK: cmp w0, #0 +; CHECK: csel w8, w8, w0, lt +; CHECK: asr w0, w8, #5 + %div = sdiv i32 %x, 32 + ret i32 %div +} + +define i64 @test4(i64 %x) { +; CHECK-LABEL: test4 +; CHECK: add x8, x0, #7 +; CHECK: cmp x0, #0 +; CHECK: csel x8, x8, x0, lt +; CHECK: asr x0, x8, #3 + %div = sdiv i64 %x, 8 + ret i64 %div +} + +define i64 @test5(i64 %x) { +; CHECK-LABEL: test5 +; CHECK: add x8, x0, #7 +; CHECK: cmp x0, #0 +; CHECK: csel x8, x8, x0, lt +; CHECK: neg x0, x8, asr #3 + %div = sdiv i64 %x, -8 + ret i64 %div +} + +define i64 @test6(i64 %x) { +; CHECK-LABEL: test6 +; CHECK: add x8, x0, #63 +; CHECK: cmp x0, #0 +; CHECK: csel x8, x8, x0, lt +; CHECK: asr x0, x8, #6 + %div = sdiv i64 %x, 64 + ret i64 %div +}