diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -57,42 +57,38 @@ HANDLE_LIBCALL(SDIV_I32, "__divsi3") HANDLE_LIBCALL(SDIV_I64, "__divdi3") HANDLE_LIBCALL(SDIV_I128, "__divti3") -HANDLE_LIBCALL(SDIV_IEXT, "__divei4") HANDLE_LIBCALL(UDIV_I8, "__udivqi3") HANDLE_LIBCALL(UDIV_I16, "__udivhi3") HANDLE_LIBCALL(UDIV_I32, "__udivsi3") HANDLE_LIBCALL(UDIV_I64, "__udivdi3") HANDLE_LIBCALL(UDIV_I128, "__udivti3") -HANDLE_LIBCALL(UDIV_IEXT, "__udivei4") HANDLE_LIBCALL(SREM_I8, "__modqi3") HANDLE_LIBCALL(SREM_I16, "__modhi3") HANDLE_LIBCALL(SREM_I32, "__modsi3") HANDLE_LIBCALL(SREM_I64, "__moddi3") HANDLE_LIBCALL(SREM_I128, "__modti3") -HANDLE_LIBCALL(SREM_IEXT, "__modei4") HANDLE_LIBCALL(UREM_I8, "__umodqi3") HANDLE_LIBCALL(UREM_I16, "__umodhi3") HANDLE_LIBCALL(UREM_I32, "__umodsi3") HANDLE_LIBCALL(UREM_I64, "__umoddi3") HANDLE_LIBCALL(UREM_I128, "__umodti3") -HANDLE_LIBCALL(UREM_IEXT, "__umodei4") HANDLE_LIBCALL(SDIVREM_I8, nullptr) HANDLE_LIBCALL(SDIVREM_I16, nullptr) HANDLE_LIBCALL(SDIVREM_I32, nullptr) HANDLE_LIBCALL(SDIVREM_I64, nullptr) HANDLE_LIBCALL(SDIVREM_I128, nullptr) -HANDLE_LIBCALL(SDIVREM_IEXT, nullptr) +HANDLE_LIBCALL(SDIVREM_IEXT, "__divmodei5") HANDLE_LIBCALL(UDIVREM_I8, nullptr) HANDLE_LIBCALL(UDIVREM_I16, nullptr) HANDLE_LIBCALL(UDIVREM_I32, nullptr) HANDLE_LIBCALL(UDIVREM_I64, nullptr) HANDLE_LIBCALL(UDIVREM_I128, nullptr) -HANDLE_LIBCALL(UDIVREM_IEXT, nullptr) +HANDLE_LIBCALL(UDIVREM_IEXT, "__udivmodei5") HANDLE_LIBCALL(NEG_I32, "__negsi2") HANDLE_LIBCALL(NEG_I64, "__negdi2") diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -143,8 +143,7 @@ SmallVectorImpl &Results); SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, - RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128, - RTLIB::Libcall Call_IEXT); + RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); void ExpandArgFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, @@ -2103,17 +2102,16 @@ ExpandFPLibCall(Node, LC, Results); } -SDValue SelectionDAGLegalize::ExpandIntLibCall( - SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, - RTLIB::Libcall Call_I128, RTLIB::Libcall Call_IEXT) { +SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode *Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { - default: - LC = Call_IEXT; - break; - + llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; @@ -4323,24 +4321,24 @@ RTLIB::SUB_PPCF128, Results); break; case ISD::SREM: - Results.push_back(ExpandIntLibCall( - Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32, - RTLIB::SREM_I64, RTLIB::SREM_I128, RTLIB::SREM_IEXT)); + Results.push_back(ExpandIntLibCall(Node, true, RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128)); break; case ISD::UREM: - Results.push_back(ExpandIntLibCall( - Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32, - RTLIB::UREM_I64, RTLIB::UREM_I128, RTLIB::UREM_IEXT)); + Results.push_back(ExpandIntLibCall(Node, false, RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128)); break; case ISD::SDIV: - Results.push_back(ExpandIntLibCall( - Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32, - RTLIB::SDIV_I64, RTLIB::SDIV_I128, RTLIB::SDIV_IEXT)); + Results.push_back(ExpandIntLibCall(Node, true, RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128)); break; case ISD::UDIV: - Results.push_back(ExpandIntLibCall( - Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32, - RTLIB::UDIV_I64, RTLIB::UDIV_I128, RTLIB::UDIV_IEXT)); + Results.push_back(ExpandIntLibCall(Node, false, RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128)); break; case ISD::SDIVREM: case ISD::UDIVREM: @@ -4348,9 +4346,9 @@ ExpandDivRemLibCall(Node, Results); break; case ISD::MUL: - Results.push_back(ExpandIntLibCall( - Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32, - RTLIB::MUL_I64, RTLIB::MUL_I128, RTLIB::MUL_IEXT)); + Results.push_back(ExpandIntLibCall(Node, false, RTLIB::MUL_I8, + RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128)); break; case ISD::CTLZ_ZERO_UNDEF: switch (Node->getSimpleValueType(0).SimpleTy) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3914,55 +3914,100 @@ ReplaceValueWith(SDValue(Node, 1), Ovf); } -// Emit a call to __udivei4 and friends which require +// Emit a call to __udivmodei5/__divmodei5 which require // the arguments be based on the stack -// and extra argument that contains the number of bits of the operands. +// and extra argument that contains the number of words of the operands. // Returns the result of the call operation. -static SDValue ExpandExtIntRes_DIVREM(const TargetLowering &TLI, - const RTLIB::Libcall &LC, - SelectionDAG &DAG, SDNode *N, +static SDValue ExpandExtIntRes_DIVREM(const TargetLowering &TLI, bool IsSigned, + bool IsDiv, SelectionDAG &DAG, SDNode *N, const SDLoc &DL, const EVT &VT) { SDValue InChain = DAG.getEntryNode(); + unsigned WordSizeInBits = DAG.getLibInfo().getIntSize(); + TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - // The signature of __udivei4 is - // void __udivei4(unsigned int *quo, unsigned int *a, unsigned int *b, - // unsigned int bits) + // The signature of __udivmodei5 and __divmodei5 is + /// Computes the unsigned division of a / b for two large integers + /// composed of n significant words. + /// Writes the quotient to quo and the remainder to rem. + /// + /// \param quo The quotient represented by n words. Must be non-null. + /// \param rem The remainder represented by n words. Must be non-null. + /// \param a The dividend represented by n + 1 words. Must be non-null. + /// \param b The divisor represented by n words. Must be non-null. + + /// \note The word order is in host endianness. + /// \note Might modify a and b. + /// \note The storage of 'a' needs to hold n + 1 elements because some + /// implementations need extra scratch space in the most significant + /// word. The value of that word is ignored. + /// COMPILER_RT_ABI void __udivmodei5(su_int *quo, su_int *rem, su_int *a, + /// su_int *b, unsigned int n); + EVT ArgVT = N->op_begin()->getValueType(); assert(ArgVT.isInteger() && ArgVT.getSizeInBits() > 128 && "Unexpected argument type for lowering"); Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - SDValue Output = DAG.CreateStackTemporary(ArgVT); - Entry.Node = Output; + // quo + SDValue Quotient = DAG.CreateStackTemporary(ArgVT); + Entry.Node = Quotient; Entry.Ty = ArgTy->getPointerTo(); Entry.IsSExt = false; Entry.IsZExt = false; Args.push_back(Entry); - for (const llvm::SDUse &Op : N->ops()) { - SDValue StackPtr = DAG.CreateStackTemporary(ArgVT); - InChain = DAG.getStore(InChain, DL, Op, StackPtr, MachinePointerInfo()); - Entry.Node = StackPtr; - Entry.Ty = ArgTy->getPointerTo(); - Entry.IsSExt = false; - Entry.IsZExt = false; - Args.push_back(Entry); - } + // rem + SDValue Remainder = DAG.CreateStackTemporary(ArgVT); + Entry.Node = Remainder; + Entry.Ty = ArgTy->getPointerTo(); + Entry.IsSExt = false; + Entry.IsZExt = false; + Args.push_back(Entry); + /// Argument 'a' must hold one extra word + EVT ArgWithExtraWordVT = EVT::getIntegerVT( + *DAG.getContext(), WordSizeInBits + ArgVT.getSizeInBits()); + SDValue StackPtr = DAG.CreateStackTemporary(ArgWithExtraWordVT); + InChain = DAG.getStore( + InChain, DL, DAG.getZExtOrTrunc(*N->op_begin(), DL, ArgWithExtraWordVT), + StackPtr, MachinePointerInfo()); + Entry.Node = StackPtr; + Entry.Ty = + ArgWithExtraWordVT.getTypeForEVT(*DAG.getContext())->getPointerTo(); + Entry.IsSExt = false; + Entry.IsZExt = false; + Args.push_back(Entry); + + // b + StackPtr = DAG.CreateStackTemporary(ArgVT); + InChain = DAG.getStore(InChain, DL, *(N->op_begin() + 1), StackPtr, + MachinePointerInfo()); + Entry.Node = StackPtr; + Entry.Ty = ArgTy->getPointerTo(); + Entry.IsSExt = false; + Entry.IsZExt = false; + Args.push_back(Entry); + + // words int Bits = N->getOperand(0) .getValueType() .getTypeForEVT(*DAG.getContext()) ->getIntegerBitWidth(); - Entry.Node = DAG.getConstant(Bits, DL, TLI.getPointerTy(DAG.getDataLayout())); - Entry.Ty = Type::getInt32Ty(*DAG.getContext()); + assert(Bits % WordSizeInBits == 0); + unsigned Words = Bits / WordSizeInBits; + Entry.Node = DAG.getConstant( + Words, DL, MVT::getIntegerVT(DAG.getLibInfo().getIntSize())); + Entry.Ty = Type::getIntNTy(*DAG.getContext(), DAG.getLibInfo().getIntSize()); Entry.IsSExt = false; Entry.IsZExt = true; Args.push_back(Entry); + RTLIB::Libcall LC = IsSigned ? RTLIB::SDIVREM_IEXT : RTLIB::UDIVREM_IEXT; + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); @@ -3975,6 +4020,7 @@ SDValue Chain = TLI.LowerCallTo(CLI).second; + SDValue Output = IsDiv ? Quotient : Remainder; return DAG.getLoad(ArgVT, DL, Chain, Output, MachinePointerInfo()); } @@ -4001,8 +4047,8 @@ LC = RTLIB::SDIV_I128; else { - SDValue Result = - ExpandExtIntRes_DIVREM(TLI, RTLIB::SDIV_IEXT, DAG, N, dl, VT); + SDValue Result = ExpandExtIntRes_DIVREM(TLI, /*IsSigned=*/true, + /*IsDiv=*/true, DAG, N, dl, VT); SplitInteger(Result, Lo, Hi); return; } @@ -4200,8 +4246,8 @@ LC = RTLIB::SREM_I128; else { - SDValue Result = - ExpandExtIntRes_DIVREM(TLI, RTLIB::SREM_IEXT, DAG, N, dl, VT); + SDValue Result = ExpandExtIntRes_DIVREM(TLI, /*IsSigned=*/true, + /*IsDiv=*/false, DAG, N, dl, VT); SplitInteger(Result, Lo, Hi); return; } @@ -4383,8 +4429,8 @@ LC = RTLIB::UDIV_I128; else { - SDValue Result = - ExpandExtIntRes_DIVREM(TLI, RTLIB::UDIV_IEXT, DAG, N, dl, VT); + SDValue Result = ExpandExtIntRes_DIVREM(TLI, /*IsSigned=*/false, + /*IsDiv=*/true, DAG, N, dl, VT); SplitInteger(Result, Lo, Hi); return; } @@ -4418,8 +4464,8 @@ LC = RTLIB::UREM_I128; else { - SDValue Result = - ExpandExtIntRes_DIVREM(TLI, RTLIB::UREM_IEXT, DAG, N, dl, VT); + SDValue Result = ExpandExtIntRes_DIVREM(TLI, /*IsSigned=*/false, + /*IsDiv=*/false, DAG, N, dl, VT); SplitInteger(Result, Lo, Hi); return; } diff --git a/llvm/test/CodeGen/AArch64/udivmodei5.ll b/llvm/test/CodeGen/AArch64/udivmodei5.ll --- a/llvm/test/CodeGen/AArch64/udivmodei5.ll +++ b/llvm/test/CodeGen/AArch64/udivmodei5.ll @@ -5,61 +5,66 @@ define void @udiv129(i129* %ptr, i129* %out) nounwind { ; CHECK-LABEL: udiv129: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 +; CHECK-NEXT: sub sp, sp, #160 ; CHECK-NEXT: ldp x10, x11, [x0] -; CHECK-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #144] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: mov w8, #3 ; CHECK-NEXT: ldrb w9, [x0, #16] -; CHECK-NEXT: add x0, sp, #64 -; CHECK-NEXT: add x1, sp, #32 -; CHECK-NEXT: mov x2, sp -; CHECK-NEXT: mov w3, #256 +; CHECK-NEXT: add x0, sp, #112 +; CHECK-NEXT: add x1, sp, #80 +; CHECK-NEXT: add x2, sp, #32 +; CHECK-NEXT: mov x3, sp +; CHECK-NEXT: mov w4, #8 ; CHECK-NEXT: stp x9, xzr, [sp, #48] ; CHECK-NEXT: stp xzr, xzr, [sp, #8] ; CHECK-NEXT: stp xzr, x10, [sp, #24] +; CHECK-NEXT: str wzr, [sp, #64] ; CHECK-NEXT: str x11, [sp, #40] ; CHECK-NEXT: str x8, [sp] -; CHECK-NEXT: bl __udivei4 -; CHECK-NEXT: ldr w8, [sp, #80] -; CHECK-NEXT: ldp x9, x10, [sp, #64] +; CHECK-NEXT: bl __udivmodei5 +; CHECK-NEXT: ldr w8, [sp, #128] +; CHECK-NEXT: ldp x9, x10, [sp, #112] ; CHECK-NEXT: and w8, w8, #0x1 ; CHECK-NEXT: stp x9, x10, [x19] ; CHECK-NEXT: strb w8, [x19, #16] -; CHECK-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: ldp x30, x19, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #160 ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: udiv129: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: sub sp, sp, #112 +; CHECK-BE-NEXT: sub sp, sp, #160 ; CHECK-BE-NEXT: ldp x11, x10, [x0] -; CHECK-BE-NEXT: mov w8, #3 -; CHECK-BE-NEXT: stp x30, x19, [sp, #96] // 16-byte Folded Spill -; CHECK-BE-NEXT: ldrb w9, [x0, #16] +; CHECK-BE-NEXT: stp x30, x19, [sp, #144] // 16-byte Folded Spill ; CHECK-BE-NEXT: mov x19, x1 -; CHECK-BE-NEXT: add x0, sp, #64 -; CHECK-BE-NEXT: add x1, sp, #32 -; CHECK-BE-NEXT: stp x8, xzr, [sp, #24] -; CHECK-BE-NEXT: mov x2, sp -; CHECK-BE-NEXT: extr x8, x11, x10, #56 -; CHECK-BE-NEXT: lsr x11, x11, #56 +; CHECK-BE-NEXT: ldrb w9, [x0, #16] +; CHECK-BE-NEXT: mov w8, #3 +; CHECK-BE-NEXT: add x0, sp, #112 +; CHECK-BE-NEXT: add x1, sp, #80 +; CHECK-BE-NEXT: add x2, sp, #32 +; CHECK-BE-NEXT: mov x3, sp ; CHECK-BE-NEXT: bfi x9, x10, #8, #56 -; CHECK-BE-NEXT: mov w3, #256 -; CHECK-BE-NEXT: stp xzr, xzr, [sp, #8] -; CHECK-BE-NEXT: str xzr, [sp] -; CHECK-BE-NEXT: stp x11, x8, [sp, #40] -; CHECK-BE-NEXT: str x9, [sp, #56] -; CHECK-BE-NEXT: bl __udivei4 -; CHECK-BE-NEXT: ldp x9, x8, [sp, #72] -; CHECK-BE-NEXT: ldr x10, [sp, #88] +; CHECK-BE-NEXT: extr x10, x11, x10, #56 +; CHECK-BE-NEXT: lsr x11, x11, #56 +; CHECK-BE-NEXT: mov w4, #8 +; CHECK-BE-NEXT: stp xzr, xzr, [sp, #32] +; CHECK-BE-NEXT: extr x11, x11, x10, #32 +; CHECK-BE-NEXT: extr x10, x10, x9, #32 +; CHECK-BE-NEXT: stp xzr, xzr, [sp] +; CHECK-BE-NEXT: stp xzr, x8, [sp, #16] +; CHECK-BE-NEXT: stp x11, x10, [sp, #48] +; CHECK-BE-NEXT: str w9, [sp, #64] +; CHECK-BE-NEXT: bl __udivmodei5 +; CHECK-BE-NEXT: ldp x9, x8, [sp, #120] +; CHECK-BE-NEXT: ldr x10, [sp, #136] ; CHECK-BE-NEXT: extr x9, x9, x8, #8 ; CHECK-BE-NEXT: extr x8, x8, x10, #8 ; CHECK-BE-NEXT: strb w10, [x19, #16] ; CHECK-BE-NEXT: and x9, x9, #0x1ffffffffffffff ; CHECK-BE-NEXT: stp x9, x8, [x19] -; CHECK-BE-NEXT: ldp x30, x19, [sp, #96] // 16-byte Folded Reload -; CHECK-BE-NEXT: add sp, sp, #112 +; CHECK-BE-NEXT: ldp x30, x19, [sp, #144] // 16-byte Folded Reload +; CHECK-BE-NEXT: add sp, sp, #160 ; CHECK-BE-NEXT: ret %a = load i129, i129* %ptr %res = udiv i129 %a, 3 @@ -70,46 +75,52 @@ define i129 @urem129(i129 %a, i129 %b) nounwind { ; CHECK-LABEL: urem129: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 +; CHECK-NEXT: sub sp, sp, #160 ; CHECK-NEXT: stp x0, x1, [sp, #32] ; CHECK-NEXT: and x8, x2, #0x1 -; CHECK-NEXT: and x9, x6, #0x1 -; CHECK-NEXT: add x0, sp, #64 -; CHECK-NEXT: add x1, sp, #32 -; CHECK-NEXT: mov x2, sp -; CHECK-NEXT: mov w3, #256 -; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill ; CHECK-NEXT: stp x4, x5, [sp] +; CHECK-NEXT: and x9, x6, #0x1 +; CHECK-NEXT: add x0, sp, #112 +; CHECK-NEXT: add x1, sp, #80 +; CHECK-NEXT: add x2, sp, #32 +; CHECK-NEXT: mov x3, sp +; CHECK-NEXT: mov w4, #8 +; CHECK-NEXT: str x30, [sp, #144] // 8-byte Folded Spill ; CHECK-NEXT: stp x8, xzr, [sp, #48] +; CHECK-NEXT: str wzr, [sp, #64] ; CHECK-NEXT: stp x9, xzr, [sp, #16] -; CHECK-NEXT: bl __umodei4 -; CHECK-NEXT: ldp x1, x8, [sp, #72] -; CHECK-NEXT: ldr x0, [sp, #64] -; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-NEXT: bl __udivmodei5 +; CHECK-NEXT: ldp x1, x8, [sp, #88] +; CHECK-NEXT: ldr x0, [sp, #80] +; CHECK-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload ; CHECK-NEXT: and x2, x8, #0x1 -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #160 ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: urem129: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: sub sp, sp, #112 -; CHECK-BE-NEXT: stp x1, x2, [sp, #48] -; CHECK-BE-NEXT: and x8, x0, #0x1 -; CHECK-BE-NEXT: and x9, x4, #0x1 -; CHECK-BE-NEXT: add x0, sp, #64 -; CHECK-BE-NEXT: add x1, sp, #32 -; CHECK-BE-NEXT: mov x2, sp -; CHECK-BE-NEXT: mov w3, #256 -; CHECK-BE-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-BE-NEXT: stp x6, xzr, [sp, #24] -; CHECK-BE-NEXT: stp x9, x5, [sp, #8] -; CHECK-BE-NEXT: str xzr, [sp] -; CHECK-BE-NEXT: str x8, [sp, #40] -; CHECK-BE-NEXT: bl __umodei4 -; CHECK-BE-NEXT: ldp x8, x1, [sp, #72] -; CHECK-BE-NEXT: ldp x2, x30, [sp, #88] // 8-byte Folded Reload +; CHECK-BE-NEXT: sub sp, sp, #160 +; CHECK-BE-NEXT: and x9, x0, #0x1 +; CHECK-BE-NEXT: extr x11, x1, x2, #32 +; CHECK-BE-NEXT: and x10, x4, #0x1 +; CHECK-BE-NEXT: str w2, [sp, #64] +; CHECK-BE-NEXT: extr x8, x9, x1, #32 +; CHECK-BE-NEXT: add x0, sp, #112 +; CHECK-BE-NEXT: add x1, sp, #80 +; CHECK-BE-NEXT: add x2, sp, #32 +; CHECK-BE-NEXT: mov x3, sp +; CHECK-BE-NEXT: mov w4, #8 +; CHECK-BE-NEXT: str x30, [sp, #144] // 8-byte Folded Spill +; CHECK-BE-NEXT: stp xzr, xzr, [sp, #32] +; CHECK-BE-NEXT: stp x5, x6, [sp, #16] +; CHECK-BE-NEXT: stp xzr, x10, [sp] +; CHECK-BE-NEXT: stp x8, x11, [sp, #48] +; CHECK-BE-NEXT: bl __udivmodei5 +; CHECK-BE-NEXT: ldp x8, x1, [sp, #88] +; CHECK-BE-NEXT: ldr x2, [sp, #104] +; CHECK-BE-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload ; CHECK-BE-NEXT: and x0, x8, #0x1 -; CHECK-BE-NEXT: add sp, sp, #112 +; CHECK-BE-NEXT: add sp, sp, #160 ; CHECK-BE-NEXT: ret %res = urem i129 %a, %b ret i129 %res @@ -118,45 +129,53 @@ define i129 @sdiv129(i129 %a, i129 %b) nounwind { ; CHECK-LABEL: sdiv129: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 +; CHECK-NEXT: sub sp, sp, #160 ; CHECK-NEXT: sbfx x8, x2, #0, #1 ; CHECK-NEXT: stp x0, x1, [sp, #32] -; CHECK-NEXT: sbfx x9, x6, #0, #1 -; CHECK-NEXT: add x0, sp, #64 -; CHECK-NEXT: add x1, sp, #32 -; CHECK-NEXT: mov x2, sp -; CHECK-NEXT: mov w3, #256 -; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill ; CHECK-NEXT: stp x4, x5, [sp] +; CHECK-NEXT: sbfx x9, x6, #0, #1 +; CHECK-NEXT: add x0, sp, #112 +; CHECK-NEXT: add x1, sp, #80 +; CHECK-NEXT: add x2, sp, #32 +; CHECK-NEXT: mov x3, sp +; CHECK-NEXT: mov w4, #8 +; CHECK-NEXT: str x30, [sp, #144] // 8-byte Folded Spill ; CHECK-NEXT: stp x8, x8, [sp, #48] +; CHECK-NEXT: str wzr, [sp, #64] ; CHECK-NEXT: stp x9, x9, [sp, #16] -; CHECK-NEXT: bl __divei4 -; CHECK-NEXT: ldp x1, x8, [sp, #72] -; CHECK-NEXT: ldr x0, [sp, #64] -; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-NEXT: bl __divmodei5 +; CHECK-NEXT: ldp x1, x8, [sp, #120] +; CHECK-NEXT: ldr x0, [sp, #112] +; CHECK-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload ; CHECK-NEXT: and x2, x8, #0x1 -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #160 ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: sdiv129: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: sub sp, sp, #112 +; CHECK-BE-NEXT: sub sp, sp, #160 ; CHECK-BE-NEXT: sbfx x8, x0, #0, #1 -; CHECK-BE-NEXT: stp x1, x2, [sp, #48] -; CHECK-BE-NEXT: sbfx x9, x4, #0, #1 -; CHECK-BE-NEXT: add x0, sp, #64 -; CHECK-BE-NEXT: add x1, sp, #32 -; CHECK-BE-NEXT: mov x2, sp -; CHECK-BE-NEXT: mov w3, #256 -; CHECK-BE-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-BE-NEXT: extr x9, x1, x2, #32 +; CHECK-BE-NEXT: sbfx x10, x4, #0, #1 +; CHECK-BE-NEXT: str w2, [sp, #64] +; CHECK-BE-NEXT: extr x11, x8, x1, #32 +; CHECK-BE-NEXT: add x0, sp, #112 +; CHECK-BE-NEXT: add x1, sp, #80 +; CHECK-BE-NEXT: add x2, sp, #32 +; CHECK-BE-NEXT: mov x3, sp +; CHECK-BE-NEXT: mov w4, #8 +; CHECK-BE-NEXT: stp x11, x9, [sp, #48] +; CHECK-BE-NEXT: ror x9, x8, #32 +; CHECK-BE-NEXT: lsr x8, x8, #32 +; CHECK-BE-NEXT: str x30, [sp, #144] // 8-byte Folded Spill ; CHECK-BE-NEXT: stp x5, x6, [sp, #16] -; CHECK-BE-NEXT: stp x8, x8, [sp, #32] -; CHECK-BE-NEXT: stp x9, x9, [sp] -; CHECK-BE-NEXT: bl __divei4 -; CHECK-BE-NEXT: ldp x8, x1, [sp, #72] -; CHECK-BE-NEXT: ldp x2, x30, [sp, #88] // 8-byte Folded Reload +; CHECK-BE-NEXT: stp x10, x10, [sp] +; CHECK-BE-NEXT: stp x8, x9, [sp, #32] +; CHECK-BE-NEXT: bl __divmodei5 +; CHECK-BE-NEXT: ldp x8, x1, [sp, #120] +; CHECK-BE-NEXT: ldp x2, x30, [sp, #136] // 8-byte Folded Reload ; CHECK-BE-NEXT: and x0, x8, #0x1 -; CHECK-BE-NEXT: add sp, sp, #112 +; CHECK-BE-NEXT: add sp, sp, #160 ; CHECK-BE-NEXT: ret %res = sdiv i129 %a, %b ret i129 %res @@ -165,45 +184,54 @@ define i129 @srem129(i129 %a, i129 %b) nounwind { ; CHECK-LABEL: srem129: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 +; CHECK-NEXT: sub sp, sp, #160 ; CHECK-NEXT: sbfx x8, x2, #0, #1 ; CHECK-NEXT: stp x0, x1, [sp, #32] -; CHECK-NEXT: sbfx x9, x6, #0, #1 -; CHECK-NEXT: add x0, sp, #64 -; CHECK-NEXT: add x1, sp, #32 -; CHECK-NEXT: mov x2, sp -; CHECK-NEXT: mov w3, #256 -; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill ; CHECK-NEXT: stp x4, x5, [sp] +; CHECK-NEXT: sbfx x9, x6, #0, #1 +; CHECK-NEXT: add x0, sp, #112 +; CHECK-NEXT: add x1, sp, #80 +; CHECK-NEXT: add x2, sp, #32 +; CHECK-NEXT: mov x3, sp +; CHECK-NEXT: mov w4, #8 +; CHECK-NEXT: str x30, [sp, #144] // 8-byte Folded Spill ; CHECK-NEXT: stp x8, x8, [sp, #48] +; CHECK-NEXT: str wzr, [sp, #64] ; CHECK-NEXT: stp x9, x9, [sp, #16] -; CHECK-NEXT: bl __modei4 -; CHECK-NEXT: ldp x1, x8, [sp, #72] -; CHECK-NEXT: ldr x0, [sp, #64] -; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-NEXT: bl __divmodei5 +; CHECK-NEXT: ldp x1, x8, [sp, #88] +; CHECK-NEXT: ldr x0, [sp, #80] +; CHECK-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload ; CHECK-NEXT: and x2, x8, #0x1 -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: add sp, sp, #160 ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: srem129: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: sub sp, sp, #112 +; CHECK-BE-NEXT: sub sp, sp, #160 ; CHECK-BE-NEXT: sbfx x8, x0, #0, #1 -; CHECK-BE-NEXT: stp x1, x2, [sp, #48] -; CHECK-BE-NEXT: sbfx x9, x4, #0, #1 -; CHECK-BE-NEXT: add x0, sp, #64 -; CHECK-BE-NEXT: add x1, sp, #32 -; CHECK-BE-NEXT: mov x2, sp -; CHECK-BE-NEXT: mov w3, #256 -; CHECK-BE-NEXT: str x30, [sp, #96] // 8-byte Folded Spill +; CHECK-BE-NEXT: extr x9, x1, x2, #32 +; CHECK-BE-NEXT: sbfx x10, x4, #0, #1 +; CHECK-BE-NEXT: str w2, [sp, #64] +; CHECK-BE-NEXT: extr x11, x8, x1, #32 +; CHECK-BE-NEXT: add x0, sp, #112 +; CHECK-BE-NEXT: add x1, sp, #80 +; CHECK-BE-NEXT: add x2, sp, #32 +; CHECK-BE-NEXT: mov x3, sp +; CHECK-BE-NEXT: mov w4, #8 +; CHECK-BE-NEXT: stp x11, x9, [sp, #48] +; CHECK-BE-NEXT: ror x9, x8, #32 +; CHECK-BE-NEXT: lsr x8, x8, #32 +; CHECK-BE-NEXT: str x30, [sp, #144] // 8-byte Folded Spill ; CHECK-BE-NEXT: stp x5, x6, [sp, #16] -; CHECK-BE-NEXT: stp x8, x8, [sp, #32] -; CHECK-BE-NEXT: stp x9, x9, [sp] -; CHECK-BE-NEXT: bl __modei4 -; CHECK-BE-NEXT: ldp x8, x1, [sp, #72] -; CHECK-BE-NEXT: ldp x2, x30, [sp, #88] // 8-byte Folded Reload +; CHECK-BE-NEXT: stp x10, x10, [sp] +; CHECK-BE-NEXT: stp x8, x9, [sp, #32] +; CHECK-BE-NEXT: bl __divmodei5 +; CHECK-BE-NEXT: ldp x8, x1, [sp, #88] +; CHECK-BE-NEXT: ldr x2, [sp, #104] +; CHECK-BE-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload ; CHECK-BE-NEXT: and x0, x8, #0x1 -; CHECK-BE-NEXT: add sp, sp, #112 +; CHECK-BE-NEXT: add sp, sp, #160 ; CHECK-BE-NEXT: ret %res = srem i129 %a, %b ret i129 %res @@ -213,63 +241,75 @@ define i257 @sdiv257(i257 %a, i257 %b) nounwind { ; CHECK-LABEL: sdiv257: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #208 -; CHECK-NEXT: ldp x8, x9, [sp, #208] +; CHECK-NEXT: sub sp, sp, #288 +; CHECK-NEXT: ldp x8, x9, [sp, #288] ; CHECK-NEXT: stp x2, x3, [sp, #80] -; CHECK-NEXT: mov x2, sp +; CHECK-NEXT: add x2, sp, #64 ; CHECK-NEXT: stp x0, x1, [sp, #64] -; CHECK-NEXT: add x0, sp, #128 -; CHECK-NEXT: add x1, sp, #64 -; CHECK-NEXT: mov w3, #512 -; CHECK-NEXT: str x30, [sp, #192] // 8-byte Folded Spill +; CHECK-NEXT: add x0, sp, #208 +; CHECK-NEXT: add x1, sp, #144 +; CHECK-NEXT: mov x3, sp +; CHECK-NEXT: stp x29, x30, [sp, #272] // 16-byte Folded Spill ; CHECK-NEXT: stp x8, x9, [sp, #16] -; CHECK-NEXT: ldr x9, [sp, #224] +; CHECK-NEXT: ldr x9, [sp, #304] ; CHECK-NEXT: sbfx x8, x4, #0, #1 +; CHECK-NEXT: mov w4, #16 ; CHECK-NEXT: stp x6, x7, [sp] ; CHECK-NEXT: sbfx x9, x9, #0, #1 +; CHECK-NEXT: str wzr, [sp, #128] ; CHECK-NEXT: stp x8, x8, [sp, #112] ; CHECK-NEXT: stp x8, x8, [sp, #96] ; CHECK-NEXT: stp x9, x9, [sp, #48] ; CHECK-NEXT: stp x9, x9, [sp, #32] -; CHECK-NEXT: bl __divei4 -; CHECK-NEXT: ldp x3, x8, [sp, #152] -; CHECK-NEXT: ldp x0, x1, [sp, #128] -; CHECK-NEXT: ldr x2, [sp, #144] -; CHECK-NEXT: ldr x30, [sp, #192] // 8-byte Folded Reload +; CHECK-NEXT: bl __divmodei5 +; CHECK-NEXT: ldp x3, x8, [sp, #232] +; CHECK-NEXT: ldp x0, x1, [sp, #208] +; CHECK-NEXT: ldp x29, x30, [sp, #272] // 16-byte Folded Reload ; CHECK-NEXT: and x4, x8, #0x1 -; CHECK-NEXT: add sp, sp, #208 +; CHECK-NEXT: ldr x2, [sp, #224] +; CHECK-NEXT: add sp, sp, #288 ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: sdiv257: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: sub sp, sp, #208 -; CHECK-BE-NEXT: add x8, sp, #208 -; CHECK-BE-NEXT: str x30, [sp, #192] // 8-byte Folded Spill -; CHECK-BE-NEXT: sbfx x9, x0, #0, #1 -; CHECK-BE-NEXT: add x0, sp, #128 +; CHECK-BE-NEXT: sub sp, sp, #288 +; CHECK-BE-NEXT: add x8, sp, #288 +; CHECK-BE-NEXT: mov x9, sp +; CHECK-BE-NEXT: add x9, x9, #40 +; CHECK-BE-NEXT: extr x10, x3, x4, #32 +; CHECK-BE-NEXT: stp x29, x30, [sp, #272] // 16-byte Folded Spill +; CHECK-BE-NEXT: extr x18, x1, x2, #32 ; CHECK-BE-NEXT: ld1 { v0.2d }, [x8] -; CHECK-BE-NEXT: mov x8, sp -; CHECK-BE-NEXT: add x8, x8, #40 -; CHECK-BE-NEXT: st1 { v0.2d }, [x8] -; CHECK-BE-NEXT: ldr x8, [sp, #224] -; CHECK-BE-NEXT: stp x3, x4, [sp, #112] -; CHECK-BE-NEXT: mov w3, #512 -; CHECK-BE-NEXT: stp x1, x2, [sp, #96] -; CHECK-BE-NEXT: add x1, sp, #64 -; CHECK-BE-NEXT: stp x8, x9, [sp, #56] +; CHECK-BE-NEXT: ldr x8, [sp, #304] +; CHECK-BE-NEXT: str x8, [sp, #56] +; CHECK-BE-NEXT: extr x8, x2, x3, #32 +; CHECK-BE-NEXT: st1 { v0.2d }, [x9] +; CHECK-BE-NEXT: sbfx x9, x0, #0, #1 +; CHECK-BE-NEXT: str w4, [sp, #128] +; CHECK-BE-NEXT: add x0, sp, #208 +; CHECK-BE-NEXT: stp x8, x10, [sp, #112] ; CHECK-BE-NEXT: sbfx x8, x6, #0, #1 -; CHECK-BE-NEXT: mov x2, sp -; CHECK-BE-NEXT: stp x9, x9, [sp, #80] -; CHECK-BE-NEXT: str x9, [sp, #72] +; CHECK-BE-NEXT: extr x10, x9, x1, #32 +; CHECK-BE-NEXT: ror x11, x9, #32 +; CHECK-BE-NEXT: add x1, sp, #144 +; CHECK-BE-NEXT: add x2, sp, #64 ; CHECK-BE-NEXT: stp x8, x8, [sp, #8] +; CHECK-BE-NEXT: mov x3, sp ; CHECK-BE-NEXT: stp x8, x7, [sp, #24] +; CHECK-BE-NEXT: mov w4, #16 ; CHECK-BE-NEXT: str x8, [sp] -; CHECK-BE-NEXT: bl __divei4 -; CHECK-BE-NEXT: ldp x8, x1, [sp, #152] -; CHECK-BE-NEXT: ldp x2, x3, [sp, #168] -; CHECK-BE-NEXT: ldp x4, x30, [sp, #184] // 8-byte Folded Reload +; CHECK-BE-NEXT: lsr x8, x9, #32 +; CHECK-BE-NEXT: stp x10, x18, [sp, #96] +; CHECK-BE-NEXT: str x11, [sp, #88] +; CHECK-BE-NEXT: stp x9, x11, [sp, #72] +; CHECK-BE-NEXT: str x8, [sp, #64] +; CHECK-BE-NEXT: bl __divmodei5 +; CHECK-BE-NEXT: ldp x8, x1, [sp, #232] +; CHECK-BE-NEXT: ldp x2, x3, [sp, #248] +; CHECK-BE-NEXT: ldp x29, x30, [sp, #272] // 16-byte Folded Reload ; CHECK-BE-NEXT: and x0, x8, #0x1 -; CHECK-BE-NEXT: add sp, sp, #208 +; CHECK-BE-NEXT: ldr x4, [sp, #264] +; CHECK-BE-NEXT: add sp, sp, #288 ; CHECK-BE-NEXT: ret %res = sdiv i257 %a, %b ret i257 %res diff --git a/llvm/test/CodeGen/X86/udivmodei5.ll b/llvm/test/CodeGen/X86/udivmodei5.ll --- a/llvm/test/CodeGen/X86/udivmodei5.ll +++ b/llvm/test/CodeGen/X86/udivmodei5.ll @@ -11,7 +11,7 @@ ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $104, %esp +; X86-NEXT: subl $144, %esp ; X86-NEXT: movl 24(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 20(%ebp), %eax @@ -41,15 +41,18 @@ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl %esp, %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X86-NEXT: leal {{[0-9]+}}(%esp), %edx -; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: leal {{[0-9]+}}(%esp), %edi +; X86-NEXT: pushl $8 ; X86-NEXT: pushl %eax ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %edx -; X86-NEXT: calll __udivei4 -; X86-NEXT: addl $16, %esp +; X86-NEXT: pushl %edi +; X86-NEXT: calll __udivmodei5 +; X86-NEXT: addl $20, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -71,27 +74,29 @@ ; ; X64-LABEL: udiv129: ; X64: # %bb.0: -; X64-NEXT: subq $104, %rsp +; X64-NEXT: subq $136, %rsp ; X64-NEXT: andl $1, %r9d ; X64-NEXT: andl $1, %edx ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, (%rsp) ; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) ; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, {{[0-9]+}}(%rsp) ; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: movl $256, %ecx # imm = 0x100 -; X64-NEXT: callq __udivei4@PLT +; X64-NEXT: movq %rsp, %rdx +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movl $8, %r8d +; X64-NEXT: callq __udivmodei5@PLT ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: andl $1, %ecx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: addq $104, %rsp +; X64-NEXT: addq $136, %rsp ; X64-NEXT: retq %res = udiv i129 %a, %b ret i129 %res @@ -106,7 +111,7 @@ ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $104, %esp +; X86-NEXT: subl $144, %esp ; X86-NEXT: movl 24(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 20(%ebp), %eax @@ -136,15 +141,18 @@ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl %esp, %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X86-NEXT: leal {{[0-9]+}}(%esp), %edx -; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: leal {{[0-9]+}}(%esp), %edi +; X86-NEXT: pushl $8 ; X86-NEXT: pushl %eax ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %edx -; X86-NEXT: calll __umodei4 -; X86-NEXT: addl $16, %esp +; X86-NEXT: pushl %edi +; X86-NEXT: calll __udivmodei5 +; X86-NEXT: addl $20, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -166,27 +174,29 @@ ; ; X64-LABEL: urem129: ; X64: # %bb.0: -; X64-NEXT: subq $104, %rsp +; X64-NEXT: subq $136, %rsp ; X64-NEXT: andl $1, %r9d ; X64-NEXT: andl $1, %edx ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, (%rsp) ; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) ; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, {{[0-9]+}}(%rsp) ; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: movl $256, %ecx # imm = 0x100 -; X64-NEXT: callq __umodei4@PLT +; X64-NEXT: movq %rsp, %rdx +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movl $8, %r8d +; X64-NEXT: callq __udivmodei5@PLT ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: andl $1, %ecx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: addq $104, %rsp +; X64-NEXT: addq $136, %rsp ; X64-NEXT: retq %res = urem i129 %a, %b ret i129 %res @@ -201,7 +211,7 @@ ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $104, %esp +; X86-NEXT: subl $144, %esp ; X86-NEXT: movl 24(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 20(%ebp), %eax @@ -233,15 +243,18 @@ ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl %esp, %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X86-NEXT: leal {{[0-9]+}}(%esp), %edx -; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: leal {{[0-9]+}}(%esp), %edi +; X86-NEXT: pushl $8 ; X86-NEXT: pushl %eax ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %edx -; X86-NEXT: calll __divei4 -; X86-NEXT: addl $16, %esp +; X86-NEXT: pushl %edi +; X86-NEXT: calll __divmodei5 +; X86-NEXT: addl $20, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -263,29 +276,31 @@ ; ; X64-LABEL: sdiv129: ; X64: # %bb.0: -; X64-NEXT: subq $104, %rsp +; X64-NEXT: subq $136, %rsp ; X64-NEXT: andl $1, %r9d ; X64-NEXT: negq %r9 ; X64-NEXT: andl $1, %edx ; X64-NEXT: negq %rdx ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, (%rsp) ; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, {{[0-9]+}}(%rsp) ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: movl $256, %ecx # imm = 0x100 -; X64-NEXT: callq __divei4@PLT +; X64-NEXT: movq %rsp, %rdx +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movl $8, %r8d +; X64-NEXT: callq __divmodei5@PLT ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: andl $1, %ecx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: addq $104, %rsp +; X64-NEXT: addq $136, %rsp ; X64-NEXT: retq %res = sdiv i129 %a, %b ret i129 %res @@ -300,7 +315,7 @@ ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $104, %esp +; X86-NEXT: subl $144, %esp ; X86-NEXT: movl 24(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 20(%ebp), %eax @@ -332,15 +347,18 @@ ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl %esp, %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X86-NEXT: leal {{[0-9]+}}(%esp), %edx -; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: leal {{[0-9]+}}(%esp), %edi +; X86-NEXT: pushl $8 ; X86-NEXT: pushl %eax ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %edx -; X86-NEXT: calll __modei4 -; X86-NEXT: addl $16, %esp +; X86-NEXT: pushl %edi +; X86-NEXT: calll __divmodei5 +; X86-NEXT: addl $20, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -362,29 +380,31 @@ ; ; X64-LABEL: srem129: ; X64: # %bb.0: -; X64-NEXT: subq $104, %rsp +; X64-NEXT: subq $136, %rsp ; X64-NEXT: andl $1, %r9d ; X64-NEXT: negq %r9 ; X64-NEXT: andl $1, %edx ; X64-NEXT: negq %rdx ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, (%rsp) ; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, {{[0-9]+}}(%rsp) ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: movl $256, %ecx # imm = 0x100 -; X64-NEXT: callq __modei4@PLT +; X64-NEXT: movq %rsp, %rdx +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movl $8, %r8d +; X64-NEXT: callq __divmodei5@PLT ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: andl $1, %ecx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: addq $104, %rsp +; X64-NEXT: addq $136, %rsp ; X64-NEXT: retq %res = srem i129 %a, %b ret i129 %res @@ -400,7 +420,7 @@ ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $216, %esp +; X86-NEXT: subl $288, %esp # imm = 0x120 ; X86-NEXT: movl 40(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 36(%ebp), %eax @@ -417,25 +437,21 @@ ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 12(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 72(%ebp), %eax +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: negl %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 76(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 64(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 68(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 56(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 60(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 48(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 52(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl 80(%ebp), %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: negl %eax +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) @@ -444,27 +460,34 @@ ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NEXT: movl 80(%ebp), %eax -; X86-NEXT: andl $1, %eax -; X86-NEXT: negl %eax +; X86-NEXT: movl 76(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 72(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 68(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 64(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 60(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 56(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 52(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 48(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 8(%ebp), %esi ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X86-NEXT: leal {{[0-9]+}}(%esp), %edx -; X86-NEXT: pushl $512 # imm = 0x200 +; X86-NEXT: leal {{[0-9]+}}(%esp), %edi +; X86-NEXT: pushl $16 ; X86-NEXT: pushl %eax ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %edx -; X86-NEXT: calll __divei4 -; X86-NEXT: addl $16, %esp +; X86-NEXT: pushl %edi +; X86-NEXT: calll __divmodei5 +; X86-NEXT: addl $20, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -504,7 +527,7 @@ ; X64: # %bb.0: ; X64-NEXT: pushq %r14 ; X64-NEXT: pushq %rbx -; X64-NEXT: subq $200, %rsp +; X64-NEXT: subq $264, %rsp # imm = 0x108 ; X64-NEXT: movq %rdi, %rbx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax ; X64-NEXT: andl $1, %eax @@ -518,7 +541,7 @@ ; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rsi, (%rsp) ; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) @@ -531,11 +554,13 @@ ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, {{[0-9]+}}(%rsp) ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi -; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: movl $512, %ecx # imm = 0x200 -; X64-NEXT: callq __divei4@PLT +; X64-NEXT: movq %rsp, %rdx +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movl $16, %r8d +; X64-NEXT: callq __divmodei5@PLT ; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx @@ -548,7 +573,7 @@ ; X64-NEXT: andl $1, %eax ; X64-NEXT: movb %al, 32(%rbx) ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: addq $200, %rsp +; X64-NEXT: addq $264, %rsp # imm = 0x108 ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r14 ; X64-NEXT: retq @@ -565,7 +590,7 @@ ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $496, %esp # imm = 0x1F0 +; X86-NEXT: subl $632, %esp # imm = 0x278 ; X86-NEXT: movl 132(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 128(%ebp), %eax @@ -635,6 +660,7 @@ ; X86-NEXT: movl 264(%ebp), %eax ; X86-NEXT: shll $23, %eax ; X86-NEXT: sarl $23, %eax +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 260(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) @@ -702,12 +728,14 @@ ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X86-NEXT: leal {{[0-9]+}}(%esp), %edx -; X86-NEXT: pushl $1024 # imm = 0x400 +; X86-NEXT: leal {{[0-9]+}}(%esp), %edi +; X86-NEXT: pushl $32 ; X86-NEXT: pushl %eax ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %edx -; X86-NEXT: calll __modei4 -; X86-NEXT: addl $16, %esp +; X86-NEXT: pushl %edi +; X86-NEXT: calll __divmodei5 +; X86-NEXT: addl $20, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -841,7 +869,7 @@ ; X64-NEXT: pushq %r13 ; X64-NEXT: pushq %r12 ; X64-NEXT: pushq %rbx -; X64-NEXT: subq $408, %rsp # imm = 0x198 +; X64-NEXT: subq $536, %rsp # imm = 0x218 ; X64-NEXT: movq %rdi, %rbx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) @@ -869,8 +897,13 @@ ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: shlq $23, %rax +; X64-NEXT: sarq $23, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: shlq $23, %rax +; X64-NEXT: sarq $23, %rax +; X64-NEXT: movl $0, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) @@ -899,23 +932,20 @@ ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax -; X64-NEXT: shlq $23, %rax -; X64-NEXT: sarq $23, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax -; X64-NEXT: shlq $23, %rax -; X64-NEXT: sarq $23, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: movl $1024, %ecx # imm = 0x400 -; X64-NEXT: callq __modei4@PLT +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movl $32, %r8d +; X64-NEXT: callq __divmodei5@PLT ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rcx, (%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 @@ -942,7 +972,7 @@ ; X64-NEXT: movq %r14, 32(%rbx) ; X64-NEXT: movq %r11, 24(%rbx) ; X64-NEXT: movq %r10, 16(%rbx) -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq (%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, 8(%rbx) ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, (%rbx) @@ -951,7 +981,7 @@ ; X64-NEXT: andl $511, %eax # imm = 0x1FF ; X64-NEXT: movw %ax, 124(%rbx) ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: addq $408, %rsp # imm = 0x198 +; X64-NEXT: addq $536, %rsp # imm = 0x218 ; X64-NEXT: popq %rbx ; X64-NEXT: popq %r12 ; X64-NEXT: popq %r13 @@ -972,7 +1002,7 @@ ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $200, %esp +; X86-NEXT: subl $280, %esp # imm = 0x118 ; X86-NEXT: movl 24(%ebp), %eax ; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NEXT: movl 20(%ebp), %eax @@ -1002,15 +1032,18 @@ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X86-NEXT: leal {{[0-9]+}}(%esp), %edx -; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: leal {{[0-9]+}}(%esp), %edi +; X86-NEXT: pushl $8 ; X86-NEXT: pushl %eax ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %edx -; X86-NEXT: calll __udivei4 -; X86-NEXT: addl $16, %esp +; X86-NEXT: pushl %edi +; X86-NEXT: calll __udivmodei5 +; X86-NEXT: addl $20, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: andl $1, %eax ; X86-NEXT: negl %eax @@ -1032,17 +1065,20 @@ ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl $17, (%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: movl %esp, %eax ; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X86-NEXT: leal {{[0-9]+}}(%esp), %edx -; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: leal {{[0-9]+}}(%esp), %edi +; X86-NEXT: pushl $8 ; X86-NEXT: pushl %eax ; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %edx -; X86-NEXT: calll __divei4 -; X86-NEXT: addl $16, %esp +; X86-NEXT: pushl %edi +; X86-NEXT: calll __divmodei5 +; X86-NEXT: addl $20, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -1064,7 +1100,7 @@ ; ; X64-LABEL: chain129: ; X64: # %bb.0: -; X64-NEXT: subq $200, %rsp +; X64-NEXT: subq $280, %rsp # imm = 0x118 ; X64-NEXT: andl $1, %r9d ; X64-NEXT: andl $1, %edx ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) @@ -1074,12 +1110,14 @@ ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) ; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, {{[0-9]+}}(%rsp) ; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: movl $256, %ecx # imm = 0x100 -; X64-NEXT: callq __udivei4@PLT +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movl $8, %r8d +; X64-NEXT: callq __udivmodei5@PLT ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax ; X64-NEXT: andl $1, %eax ; X64-NEXT: negq %rax @@ -1089,6 +1127,7 @@ ; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, {{[0-9]+}}(%rsp) ; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) ; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) ; X64-NEXT: movq $17, {{[0-9]+}}(%rsp) @@ -1096,15 +1135,210 @@ ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: movl $256, %ecx # imm = 0x100 -; X64-NEXT: callq __divei4@PLT +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movl $8, %r8d +; X64-NEXT: callq __divmodei5@PLT ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; X64-NEXT: andl $1, %ecx ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: addq $200, %rsp +; X64-NEXT: addq $280, %rsp # imm = 0x118 ; X64-NEXT: retq %res = udiv i129 %a, %b %res2 = sdiv i129 %res, 17 ret i129 %res2 } + +define i129 @divmod(i129 %a, i129 %b) nounwind { +; TODO: udiv + umod are not folded into a single udivmod +; X86-LABEL: divmod: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $296, %esp # imm = 0x128 +; X86-NEXT: movl 48(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: andl $1, %esi +; X86-NEXT: movl 16(%ebp), %ecx +; X86-NEXT: movl 20(%ebp), %edx +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl 44(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl 32(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: leal {{[0-9]+}}(%esp), %edi +; X86-NEXT: pushl $8 +; X86-NEXT: pushl %eax +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edx +; X86-NEXT: pushl %edi +; X86-NEXT: calll __udivmodei5 +; X86-NEXT: addl $20, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: leal {{[0-9]+}}(%esp), %esi +; X86-NEXT: pushl $8 +; X86-NEXT: pushl %eax +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edx +; X86-NEXT: pushl %esi +; X86-NEXT: calll __udivmodei5 +; X86-NEXT: addl $20, %esp +; X86-NEXT: addl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl (%esp), %esi # 4-byte Reload +; X86-NEXT: adcl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: adcl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %edi, (%eax) +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: andl $1, %ebx +; X86-NEXT: movb %bl, 16(%eax) +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl $4 +; +; X64-LABEL: divmod: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: pushq %r15 +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %r13 +; X64-NEXT: pushq %r12 +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $296, %rsp # imm = 0x128 +; X64-NEXT: movq %r9, %r15 +; X64-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq %rdx, %r13 +; X64-NEXT: movq %rsi, (%rsp) # 8-byte Spill +; X64-NEXT: movq %rdi, %rbp +; X64-NEXT: andl $1, %r15d +; X64-NEXT: andl $1, %r13d +; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movl $8, %r8d +; X64-NEXT: callq __udivmodei5@PLT +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; X64-NEXT: movq (%rsp), %rax # 8-byte Reload +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rbp, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movl $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movl $8, %r8d +; X64-NEXT: callq __udivmodei5@PLT +; X64-NEXT: addq {{[0-9]+}}(%rsp), %r12 +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %r14 +; X64-NEXT: adcq {{[0-9]+}}(%rsp), %rbx +; X64-NEXT: andl $1, %ebx +; X64-NEXT: movq %r12, %rax +; X64-NEXT: movq %r14, %rdx +; X64-NEXT: movq %rbx, %rcx +; X64-NEXT: addq $296, %rsp # imm = 0x128 +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r12 +; X64-NEXT: popq %r13 +; X64-NEXT: popq %r14 +; X64-NEXT: popq %r15 +; X64-NEXT: popq %rbp +; X64-NEXT: retq + %quo = udiv i129 %a, %b + %rem = urem i129 %a, %b + %res = add i129 %quo, %rem + ret i129 %res +} +