Index: include/llvm/Target/TargetLowering.h =================================================================== --- include/llvm/Target/TargetLowering.h +++ include/llvm/Target/TargetLowering.h @@ -2993,6 +2993,23 @@ // Legalization utility functions // + /// Expand a MUL or UMUL_LOHI of n-bit values into two or four nodes, + /// respectively, each computing an n/2-bit part of the result. + /// \param Result A vector that will be filled with the parts of the result + /// in little-endian order. + /// \param HalfVT The value type to use for the result nodes. + /// \param LL Low bits of the LHS of the MUL. You can use this parameter + /// if you want to control how low bits are extracted from the LHS. + /// \param LH High bits of the LHS of the MUL. See LL for meaning. + /// \param RL Low bits of the RHS of the MUL. See LL for meaning + /// \param RH High bits of the RHS of the MUL. See LL for meaning. + /// \returns true if the node has been expanded, false if it has not + bool expandUMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS, + SDValue RHS, SmallVectorImpl &Result, + EVT HalfVT, SelectionDAG &DAG, SDValue LL = SDValue(), + SDValue LH = SDValue(), SDValue RL = SDValue(), + SDValue RH = SDValue()) const; + /// Expand a MUL into two nodes. One that computes the high bits of /// the result and one that computes the low bits. /// \param HiLoVT The value type to use for the Lo and Hi nodes. Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3306,11 +3306,36 @@ ISD::SMUL_LOHI; EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); - assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) && + bool HasExpandOpcode = TLI.isOperationLegalOrCustom(ExpandOpcode, VT); + assert((HasExpandOpcode || ExpandOpcode == ISD::UMUL_LOHI) && "If this wasn't legal, it shouldn't have been created!"); - Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0), - Node->getOperand(1)); - Results.push_back(Tmp1.getValue(1)); + + if (HasExpandOpcode) { + Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + Results.push_back(Tmp1.getValue(1)); + break; + } + + if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::SHL, VT) && + TLI.isOperationLegalOrCustom(ISD::OR, VT)) { + SmallVector Halves; + EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext()); + if (TLI.expandUMUL_LOHI(ISD::UMUL_LOHI, VT, Node, Node->getOperand(0), + Node->getOperand(1), Halves, HalfType, DAG)) { + SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Halves[2]); + SDValue Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Halves[3]); + SDValue Shift = DAG.getConstant( + HalfType.getSizeInBits(), dl, + TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); + Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); + Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); + break; + } + } + break; } case ISD::MUL: { Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2965,9 +2965,13 @@ } // Multiply the numerator (operand 0) by the magic value - // FIXME: We should support doing a MUL in a wider type - if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) : - isOperationLegalOrCustom(ISD::MULHU, VT)) + // FIXME: Support expansion of MULHU for vector types + if (IsAfterLegalization + ? isOperationLegal(ISD::MULHU, VT) + : (isOperationLegalOrCustom(ISD::MULHU, VT) || + (VT.isScalarInteger() && + isOperationLegalOrCustom( + ISD::MULHU, VT.getHalfSizedIntegerVT(*DAG.getContext()))))) Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT)); else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) @@ -3015,110 +3019,185 @@ // Legalization Utilities //===----------------------------------------------------------------------===// -bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, - SelectionDAG &DAG, SDValue LL, SDValue LH, - SDValue RL, SDValue RH) const { - EVT VT = N->getValueType(0); - SDLoc dl(N); - - bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT); - bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT); - bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT); - bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT); +bool TargetLowering::expandUMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, + SDValue LHS, SDValue RHS, + SmallVectorImpl &Result, + EVT HalfVT, SelectionDAG &DAG, SDValue LL, + SDValue LH, SDValue RL, SDValue RH) const { + assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI); + + bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HalfVT); + bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HalfVT); + bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HalfVT); + bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HalfVT); if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { unsigned OuterBitSize = VT.getSizeInBits(); - unsigned InnerBitSize = HiLoVT.getSizeInBits(); - unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); - unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); + unsigned InnerBitSize = HalfVT.getSizeInBits(); + unsigned LHSSB = DAG.ComputeNumSignBits(LHS); + unsigned RHSSB = DAG.ComputeNumSignBits(RHS); // LL, LH, RL, and RH must be either all NULL or all set to a value. assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) || (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode())); if (!LL.getNode() && !RL.getNode() && - isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { - LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0)); - RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1)); + isOperationLegalOrCustom(ISD::TRUNCATE, HalfVT)) { + LL = DAG.getNode(ISD::TRUNCATE, dl, HalfVT, LHS); + RL = DAG.getNode(ISD::TRUNCATE, dl, HalfVT, RHS); } if (!LL.getNode()) return false; APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); - if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && - DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { + if (DAG.MaskedValueIsZero(LHS, HighMask) && + DAG.MaskedValueIsZero(RHS, HighMask)) { // The inputs are both zero-extended. + bool Expanded = false; if (HasUMUL_LOHI) { // We can emit a umul_lohi. - Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, - RL); - Hi = SDValue(Lo.getNode(), 1); - return true; - } - if (HasMULHU) { + SDValue Mul = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(HalfVT, HalfVT), LL, RL); + Result.push_back(Mul); + Result.push_back(SDValue(Mul.getNode(), 1)); + Expanded = true; + } else if (HasMULHU) { // We can emit a mulhu+mul. - Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); - Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); + Result.push_back(DAG.getNode(ISD::MUL, dl, HalfVT, LL, RL)); + Result.push_back(DAG.getNode(ISD::MULHU, dl, HalfVT, LL, RL)); + Expanded = true; + } + if (Expanded) { + if (Opcode != ISD::MUL) { + SDValue Zero = DAG.getConstant(0, dl, HalfVT); + Result.push_back(Zero); + Result.push_back(Zero); + } return true; } } - if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { + if (LHSSB > InnerBitSize && RHSSB > InnerBitSize && Opcode == ISD::MUL) { // The input values are both sign-extended. if (HasSMUL_LOHI) { // We can emit a smul_lohi. - Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, - RL); - Hi = SDValue(Lo.getNode(), 1); + SDValue Mul = DAG.getNode(ISD::SMUL_LOHI, dl, + DAG.getVTList(HalfVT, HalfVT), LL, RL); + Result.push_back(Mul); + Result.push_back(SDValue(Mul.getNode(), 1)); return true; - } - if (HasMULHS) { + } else if (HasMULHS) { // We can emit a mulhs+mul. - Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); - Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL); + Result.push_back(DAG.getNode(ISD::MUL, dl, HalfVT, LL, RL)); + Result.push_back(DAG.getNode(ISD::MULHS, dl, HalfVT, LL, RL)); return true; } } if (!LH.getNode() && !RH.getNode() && isOperationLegalOrCustom(ISD::SRL, VT) && - isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + isOperationLegalOrCustom(ISD::TRUNCATE, HalfVT)) { auto &DL = DAG.getDataLayout(); - unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits(); + unsigned ShiftAmt = VT.getSizeInBits() - HalfVT.getSizeInBits(); SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT, DL)); - LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift); - LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH); - RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift); - RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH); + LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift); + LH = DAG.getNode(ISD::TRUNCATE, dl, HalfVT, LH); + RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift); + RH = DAG.getNode(ISD::TRUNCATE, dl, HalfVT, RH); } if (!LH.getNode()) return false; - if (HasUMUL_LOHI) { - // Lo,Hi = umul LHS, RHS. - SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, - DAG.getVTList(HiLoVT, HiLoVT), LL, RL); - Lo = UMulLOHI; - Hi = UMulLOHI.getValue(1); - RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); - LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); - Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); - Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); - return true; - } - if (HasMULHU) { - Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); - Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); - RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); - LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); - Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); - Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + if (HasUMUL_LOHI || HasMULHU) { + SDValue Next; + if (HasUMUL_LOHI) { + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(HalfVT, HalfVT), LL, RL); + Result.push_back(UMulLOHI); + Next = UMulLOHI.getValue(1); + } else { + Result.push_back(DAG.getNode(ISD::MUL, dl, HalfVT, LL, RL)); + Next = DAG.getNode(ISD::MULHU, dl, HalfVT, LL, RL); + } + + if (Opcode == ISD::MUL) { + RH = DAG.getNode(ISD::MUL, dl, HalfVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, HalfVT, LH, RL); + Next = DAG.getNode(ISD::ADD, dl, HalfVT, Next, RH); + Next = DAG.getNode(ISD::ADD, dl, HalfVT, Next, LH); + Result.push_back(Next); + return true; + } + + SDValue Lo, Hi; + if (HasUMUL_LOHI) { + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(HalfVT, HalfVT), LL, RH); + Lo = UMulLOHI; + Hi = UMulLOHI.getValue(1); + } else { + Lo = DAG.getNode(ISD::MUL, dl, HalfVT, LL, RH); + Hi = DAG.getNode(ISD::MULHU, dl, HalfVT, LL, RH); + } + + SDVTList VTList = DAG.getVTList(HalfVT, MVT::Glue); + SDValue SumLo, SumHi; + SumHi = Hi; + SumLo = DAG.getNode(ISD::ADDC, dl, VTList, Next, Lo); + + if (HasUMUL_LOHI) { + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(HalfVT, HalfVT), LH, RL); + Lo = UMulLOHI; + Hi = UMulLOHI.getValue(1); + } else { + Lo = DAG.getNode(ISD::MUL, dl, HalfVT, LH, RL); + Hi = DAG.getNode(ISD::MULHU, dl, HalfVT, LH, RL); + } + + SumHi = DAG.getNode(ISD::ADDE, dl, VTList, SumHi, Hi, SumLo.getValue(1)); + SumLo = DAG.getNode(ISD::ADDC, dl, VTList, SumLo, Lo); + Result.push_back(SumLo); + + SDValue Carry = SumHi.getValue(1); + + if (HasUMUL_LOHI) { + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(HalfVT, HalfVT), LH, RH); + Lo = UMulLOHI; + Hi = UMulLOHI.getValue(1); + } else { + Lo = DAG.getNode(ISD::MUL, dl, HalfVT, LH, RH); + Hi = DAG.getNode(ISD::MULHU, dl, HalfVT, LH, RH); + } + + SDValue Zero = DAG.getConstant(0, dl, HalfVT); + SumLo = DAG.getNode(ISD::ADDE, dl, VTList, SumHi, Lo, SumLo.getValue(1)); + SumHi = DAG.getNode(ISD::ADDE, dl, VTList, Hi, Zero, Carry); + SumHi = + DAG.getNode(ISD::ADDE, dl, VTList, SumHi, Zero, SumLo.getValue(1)); + Result.push_back(SumLo); + Result.push_back(SumHi); return true; } } return false; } +bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, + SelectionDAG &DAG, SDValue LL, SDValue LH, + SDValue RL, SDValue RH) const { + SmallVector Result; + bool Ok = + expandUMUL_LOHI(N->getOpcode(), N->getValueType(0), N, N->getOperand(0), + N->getOperand(1), Result, HiLoVT, DAG, LL, LH, RL, RH); + if (Result.size() >= 2) { + Lo = Result[0]; + Hi = Result[1]; + } + return Ok; +} + bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { EVT VT = Node->getOperand(0).getValueType(); Index: test/CodeGen/AMDGPU/udiv.ll =================================================================== --- test/CodeGen/AMDGPU/udiv.ll +++ test/CodeGen/AMDGPU/udiv.ll @@ -145,3 +145,24 @@ store i32 %result.ext, i32 addrspace(1)* %out ret void } + +; FUNC-LABEL: {{^}}udiv_i32_const: +; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x24924925 +; SI-NOT: v_rcp +define void @udiv_i32_const(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { + %num = load i32, i32 addrspace(1)* %in + %result = udiv i32 %num, 7 + store i32 %result, i32 addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}udiv_i64_const: +; SI-DAG: s_mov_b32 [[MAGIC_HI:s[0-9]+]], 0x24924924 +; SI-DAG: s_mov_b32 [[MAGIC_LO:s[0-9]+]], 0x92492493 +; SI-NOT: v_rcp +define void @udiv_i64_const(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { + %num = load i64, i64 addrspace(1)* %in + %result = udiv i64 %num, 7 + store i64 %result, i64 addrspace(1)* %out + ret void +} Index: test/CodeGen/SPARC/rem.ll =================================================================== --- test/CodeGen/SPARC/rem.ll +++ test/CodeGen/SPARC/rem.ll @@ -24,12 +24,42 @@ ; PR18150 ; CHECK-LABEL: test3 -; CHECK: sethi 2545, [[R0:%[gilo][0-7]]] -; CHECK: or [[R0]], 379, [[R1:%[gilo][0-7]]] -; CHECK: mulx %o0, [[R1]], [[R2:%[gilo][0-7]]] -; CHECK: udivx [[R2]], 1021, [[R3:%[gilo][0-7]]] -; CHECK: mulx [[R3]], 1021, [[R4:%[gilo][0-7]]] -; CHECK: sub [[R2]], [[R4]], %o0 +; CHECK: sethi 2545, %o1 +; CHECK-NEXT: or %o1, 379, %o1 +; CHECK-NEXT: mulx %o0, %o1, %o0 +; CHECK-NEXT: sethi 12324, %o1 +; CHECK-NEXT: or %o1, 108, %o1 +; CHECK-NEXT: smul %o0, %o1, %o2 +; CHECK-NEXT: srl %o0, 0, %o3 +; CHECK-NEXT: sethi 1331003, %o4 +; CHECK-NEXT: or %o4, 435, %o4 +; CHECK-NEXT: mulx %o3, %o4, %o5 +; CHECK-NEXT: srlx %o5, 32, %o5 +; CHECK-NEXT: srlx %o0, 32, %g2 +; CHECK-NEXT: mulx %g2, %o4, %g3 +; CHECK-NEXT: srlx %g3, 32, %g3 +; CHECK-NEXT: mulx %o3, %o1, %o3 +; CHECK-NEXT: srlx %o3, 32, %o3 +; CHECK-NEXT: mulx %g2, %o1, %g4 +; CHECK-NEXT: srlx %g4, 32, %g4 +; CHECK-NEXT: addcc %o5, %o2, %o2 +; CHECK-NEXT: addxcc %o3, %g3, %o3 +; CHECK-NEXT: addxcc %g4, 0, %o5 +; CHECK-NEXT: smul %g2, %o4, %o4 +; CHECK-NEXT: smul %g2, %o1, %o1 +; CHECK-NEXT: addcc %o2, %o4, %o2 +; CHECK-NEXT: addxcc %o3, %o1, %o1 +; CHECK-NEXT: addxcc %o5, 0, %o2 +; CHECK-NEXT: srl %o1, 0, %o1 +; CHECK-NEXT: sllx %o2, 32, %o2 +; CHECK-NEXT: or %o1, %o2, %o1 +; CHECK-NEXT: sub %o0, %o1, %o2 +; CHECK-NEXT: srlx %o2, 1, %o2 +; CHECK-NEXT: add %o2, %o1, %o1 +; CHECK-NEXT: srlx %o1, 9, %o1 +; CHECK-NEXT: mulx %o1, 1021, %o1 +; CHECK-NEXT: retl +; CHECK-NEXT: sub %o0, %o1, %o0 define i64 @test3(i64 %b) { entry: