diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -47,6 +47,8 @@ HANDLE_LIBCALL(MUL_I32, "__mulsi3") HANDLE_LIBCALL(MUL_I64, "__muldi3") HANDLE_LIBCALL(MUL_I128, "__multi3") +HANDLE_LIBCALL(MUL_IEXT, nullptr) + HANDLE_LIBCALL(MULO_I32, "__mulosi4") HANDLE_LIBCALL(MULO_I64, "__mulodi4") HANDLE_LIBCALL(MULO_I128, "__muloti4") @@ -55,31 +57,43 @@ HANDLE_LIBCALL(SDIV_I32, "__divsi3") HANDLE_LIBCALL(SDIV_I64, "__divdi3") HANDLE_LIBCALL(SDIV_I128, "__divti3") +HANDLE_LIBCALL(SDIV_IEXT, "__divei4") + HANDLE_LIBCALL(UDIV_I8, "__udivqi3") HANDLE_LIBCALL(UDIV_I16, "__udivhi3") HANDLE_LIBCALL(UDIV_I32, "__udivsi3") HANDLE_LIBCALL(UDIV_I64, "__udivdi3") HANDLE_LIBCALL(UDIV_I128, "__udivti3") +HANDLE_LIBCALL(UDIV_IEXT, "__udivei4") + HANDLE_LIBCALL(SREM_I8, "__modqi3") HANDLE_LIBCALL(SREM_I16, "__modhi3") HANDLE_LIBCALL(SREM_I32, "__modsi3") HANDLE_LIBCALL(SREM_I64, "__moddi3") HANDLE_LIBCALL(SREM_I128, "__modti3") +HANDLE_LIBCALL(SREM_IEXT, "__modei4") + HANDLE_LIBCALL(UREM_I8, "__umodqi3") HANDLE_LIBCALL(UREM_I16, "__umodhi3") HANDLE_LIBCALL(UREM_I32, "__umodsi3") HANDLE_LIBCALL(UREM_I64, "__umoddi3") HANDLE_LIBCALL(UREM_I128, "__umodti3") +HANDLE_LIBCALL(UREM_IEXT, "__umodei4") + HANDLE_LIBCALL(SDIVREM_I8, nullptr) HANDLE_LIBCALL(SDIVREM_I16, nullptr) HANDLE_LIBCALL(SDIVREM_I32, nullptr) HANDLE_LIBCALL(SDIVREM_I64, nullptr) HANDLE_LIBCALL(SDIVREM_I128, nullptr) +HANDLE_LIBCALL(SDIVREM_IEXT, nullptr) + HANDLE_LIBCALL(UDIVREM_I8, nullptr) HANDLE_LIBCALL(UDIVREM_I16, nullptr) HANDLE_LIBCALL(UDIVREM_I32, nullptr) HANDLE_LIBCALL(UDIVREM_I64, nullptr) HANDLE_LIBCALL(UDIVREM_I128, nullptr) +HANDLE_LIBCALL(UDIVREM_IEXT, nullptr) + HANDLE_LIBCALL(NEG_I32, "__negsi2") HANDLE_LIBCALL(NEG_I64, "__negdi2") HANDLE_LIBCALL(CTLZ_I32, "__clzsi2") diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -142,12 +142,10 @@ RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128, SmallVectorImpl &Results); - SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, - RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, - RTLIB::Libcall Call_I32, - RTLIB::Libcall Call_I64, - RTLIB::Libcall Call_I128); + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128, + RTLIB::Libcall Call_IEXT); void ExpandArgFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, @@ -2101,15 +2099,17 @@ ExpandFPLibCall(Node, LC, Results); } -SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, - RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, - RTLIB::Libcall Call_I32, - RTLIB::Libcall Call_I64, - RTLIB::Libcall Call_I128) { +SDValue SelectionDAGLegalize::ExpandIntLibCall( + SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128, RTLIB::Libcall Call_IEXT) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); + + default: + LC = Call_IEXT; + break; + case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; @@ -2144,7 +2144,11 @@ RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { - default: llvm_unreachable("Unexpected request for libcall!"); + + default: + LC = isSigned ? RTLIB::SDIVREM_IEXT : RTLIB::UDIVREM_IEXT; + break; + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -4315,28 +4319,24 @@ RTLIB::SUB_PPCF128, Results); break; case ISD::SREM: - Results.push_back(ExpandIntLibCall(Node, true, - RTLIB::SREM_I8, - RTLIB::SREM_I16, RTLIB::SREM_I32, - RTLIB::SREM_I64, RTLIB::SREM_I128)); + Results.push_back(ExpandIntLibCall( + Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128, RTLIB::SREM_IEXT)); break; case ISD::UREM: - Results.push_back(ExpandIntLibCall(Node, false, - RTLIB::UREM_I8, - RTLIB::UREM_I16, RTLIB::UREM_I32, - RTLIB::UREM_I64, RTLIB::UREM_I128)); + Results.push_back(ExpandIntLibCall( + Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128, RTLIB::UREM_IEXT)); break; case ISD::SDIV: - Results.push_back(ExpandIntLibCall(Node, true, - RTLIB::SDIV_I8, - RTLIB::SDIV_I16, RTLIB::SDIV_I32, - RTLIB::SDIV_I64, RTLIB::SDIV_I128)); + Results.push_back(ExpandIntLibCall( + Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128, RTLIB::SDIV_IEXT)); break; case ISD::UDIV: - Results.push_back(ExpandIntLibCall(Node, false, - RTLIB::UDIV_I8, - RTLIB::UDIV_I16, RTLIB::UDIV_I32, - RTLIB::UDIV_I64, RTLIB::UDIV_I128)); + Results.push_back(ExpandIntLibCall( + Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128, RTLIB::UDIV_IEXT)); break; case ISD::SDIVREM: case ISD::UDIVREM: @@ -4344,10 +4344,9 @@ ExpandDivRemLibCall(Node, Results); break; case ISD::MUL: - Results.push_back(ExpandIntLibCall(Node, false, - RTLIB::MUL_I8, - RTLIB::MUL_I16, RTLIB::MUL_I32, - RTLIB::MUL_I64, RTLIB::MUL_I128)); + Results.push_back(ExpandIntLibCall( + Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128, RTLIB::MUL_IEXT)); break; case ISD::CTLZ_ZERO_UNDEF: switch (Node->getSimpleValueType(0).SimpleTy) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3901,6 +3901,69 @@ ReplaceValueWith(SDValue(Node, 1), Ovf); } +// Emit a call to __udivei4 and friends which require +// the arguments be based on the stack +// and extra argument that contains the number of bits of the operands. +// Returns the result of the call operation. +static SDValue passArguments_DIVREM(const TargetLowering &TLI, + const RTLIB::Libcall &LC, SelectionDAG &DAG, + SDNode *N, const SDLoc &DL, const EVT &VT) { + + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + // The signature of __udivei4 is + // void __udivei4(unsigned int *quo, unsigned int *a, unsigned int *b, + // unsigned int bits) + EVT ArgVT = N->op_begin()->getValueType(); + assert(ArgVT.isInteger() && ArgVT.getSizeInBits() > 128 && + "Unexpected argument type for lowering"); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + SDValue Output = DAG.CreateStackTemporary(ArgVT); + Entry.Node = Output; + Entry.Ty = ArgTy->getPointerTo(); + Entry.IsSExt = false; + Entry.IsZExt = false; + Args.push_back(Entry); + + for (const llvm::SDUse &Op : N->ops()) { + SDValue StackPtr = DAG.CreateStackTemporary(ArgVT); + InChain = DAG.getStore(InChain, DL, Op, StackPtr, MachinePointerInfo()); + Entry.Node = StackPtr; + Entry.Ty = ArgTy->getPointerTo(); + Entry.IsSExt = false; + Entry.IsZExt = false; + Args.push_back(Entry); + } + + int Bits = N->getOperand(0) + .getValueType() + .getTypeForEVT(*DAG.getContext()) + ->getIntegerBitWidth(); + Entry.Node = DAG.getConstant(Bits, DL, TLI.getPointerTy(DAG.getDataLayout())); + Entry.Ty = Type::getInt32Ty(*DAG.getContext()); + Entry.IsSExt = false; + Entry.IsZExt = true; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(DL) + .setChain(InChain) + .setLibCallee(TLI.getLibcallCallingConv(LC), + Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args)) + .setDiscardResult(); + + SDValue Chain = TLI.LowerCallTo(CLI).second; + + return DAG.getLoad(ArgVT, DL, Chain, Output, MachinePointerInfo()); +} + void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -3922,6 +3985,14 @@ LC = RTLIB::SDIV_I64; else if (VT == MVT::i128) LC = RTLIB::SDIV_I128; + + else { + SDValue Result = + passArguments_DIVREM(TLI, RTLIB::SDIV_IEXT, DAG, N, dl, VT); + SplitInteger(Result, Lo, Hi); + return; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4113,6 +4184,14 @@ LC = RTLIB::SREM_I64; else if (VT == MVT::i128) LC = RTLIB::SREM_I128; + + else { + SDValue Result = + passArguments_DIVREM(TLI, RTLIB::SREM_IEXT, DAG, N, dl, VT); + SplitInteger(Result, Lo, Hi); + return; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4288,6 +4367,14 @@ LC = RTLIB::UDIV_I64; else if (VT == MVT::i128) LC = RTLIB::UDIV_I128; + + else { + SDValue Result = + passArguments_DIVREM(TLI, RTLIB::UDIV_IEXT, DAG, N, dl, VT); + SplitInteger(Result, Lo, Hi); + return; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4315,6 +4402,14 @@ LC = RTLIB::UREM_I64; else if (VT == MVT::i128) LC = RTLIB::UREM_I128; + + else { + SDValue Result = + passArguments_DIVREM(TLI, RTLIB::UREM_IEXT, DAG, N, dl, VT); + SplitInteger(Result, Lo, Hi); + return; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); TargetLowering::MakeLibCallOptions CallOptions; diff --git a/llvm/test/CodeGen/X86/udivmodei5.ll b/llvm/test/CodeGen/X86/udivmodei5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/udivmodei5.ll @@ -0,0 +1,1197 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 + +define i129 @udiv129(i129 %a, i129 %b) { +; X86-LABEL: udiv129: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $104, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 48(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esp, %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl %eax +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edx +; X86-NEXT: calll __udivei4 +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 12(%esi) +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 4(%esi) +; X86-NEXT: movl %ecx, (%esi) +; X86-NEXT: andl $1, %eax +; X86-NEXT: movb %al, 16(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; +; X64-LABEL: udiv129: +; X64: # %bb.0: +; X64-NEXT: subq $104, %rsp +; X64-NEXT: .cfi_def_cfa_offset 112 +; X64-NEXT: andl $1, %r9d +; X64-NEXT: andl $1, %edx +; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movl $256, %ecx # imm = 0x100 +; X64-NEXT: callq __udivei4@PLT +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: andl $1, %ecx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: addq $104, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %res = udiv i129 %a, %b + ret i129 %res +} + +define i129 @urem129(i129 %a, i129 %b) { +; X86-LABEL: urem129: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $104, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 48(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esp, %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl %eax +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edx +; X86-NEXT: calll __umodei4 +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 12(%esi) +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 4(%esi) +; X86-NEXT: movl %ecx, (%esi) +; X86-NEXT: andl $1, %eax +; X86-NEXT: movb %al, 16(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; +; X64-LABEL: urem129: +; X64: # %bb.0: +; X64-NEXT: subq $104, %rsp +; X64-NEXT: .cfi_def_cfa_offset 112 +; X64-NEXT: andl $1, %r9d +; X64-NEXT: andl $1, %edx +; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movl $256, %ecx # imm = 0x100 +; X64-NEXT: callq __umodei4@PLT +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: andl $1, %ecx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: addq $104, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %res = urem i129 %a, %b + ret i129 %res +} + +define i129 @sdiv129(i129 %a, i129 %b) { +; X86-LABEL: sdiv129: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $104, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: negl %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 48(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: negl %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl %esp, %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl %eax +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edx +; X86-NEXT: calll __divei4 +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 12(%esi) +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 4(%esi) +; X86-NEXT: movl %ecx, (%esi) +; X86-NEXT: andl $1, %eax +; X86-NEXT: movb %al, 16(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; +; X64-LABEL: sdiv129: +; X64: # %bb.0: +; X64-NEXT: subq $104, %rsp +; X64-NEXT: .cfi_def_cfa_offset 112 +; X64-NEXT: andl $1, %r9d +; X64-NEXT: negq %r9 +; X64-NEXT: andl $1, %edx +; X64-NEXT: negq %rdx +; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movl $256, %ecx # imm = 0x100 +; X64-NEXT: callq __divei4@PLT +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: andl $1, %ecx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: addq $104, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %res = sdiv i129 %a, %b + ret i129 %res +} + +define i129 @srem129(i129 %a, i129 %b) { +; X86-LABEL: srem129: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $104, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: negl %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 48(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: negl %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl %esp, %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl %eax +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edx +; X86-NEXT: calll __modei4 +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 12(%esi) +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 4(%esi) +; X86-NEXT: movl %ecx, (%esi) +; X86-NEXT: andl $1, %eax +; X86-NEXT: movb %al, 16(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; +; X64-LABEL: srem129: +; X64: # %bb.0: +; X64-NEXT: subq $104, %rsp +; X64-NEXT: .cfi_def_cfa_offset 112 +; X64-NEXT: andl $1, %r9d +; X64-NEXT: negq %r9 +; X64-NEXT: andl $1, %edx +; X64-NEXT: negq %rdx +; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movl $256, %ecx # imm = 0x100 +; X64-NEXT: callq __modei4@PLT +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: andl $1, %ecx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: addq $104, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %res = srem i129 %a, %b + ret i129 %res +} + +; Some higher sizes +define i257 @sdiv257(i257 %a, i257 %b) { +; X86-LABEL: sdiv257: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $216, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 72(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 76(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 64(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 68(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 56(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 60(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 48(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 52(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: negl %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 80(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: negl %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: pushl $512 # imm = 0x200 +; X86-NEXT: pushl %eax +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edx +; X86-NEXT: calll __divei4 +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 28(%esi) +; X86-NEXT: movl %ecx, 24(%esi) +; X86-NEXT: movl %edx, 20(%esi) +; X86-NEXT: movl %edi, 16(%esi) +; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 12(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 8(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, 4(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-NEXT: movl %ecx, (%esi) +; X86-NEXT: andl $1, %eax +; X86-NEXT: movb %al, 32(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; +; X64-LABEL: sdiv257: +; X64: # %bb.0: +; X64-NEXT: pushq %r14 +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: subq $200, %rsp +; X64-NEXT: .cfi_def_cfa_offset 224 +; X64-NEXT: .cfi_offset %rbx, -24 +; X64-NEXT: .cfi_offset %r14, -16 +; X64-NEXT: movq %rdi, %rbx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negq %rax +; X64-NEXT: andl $1, %r9d +; X64-NEXT: negq %r9 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r11, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r10, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movl $512, %ecx # imm = 0x200 +; X64-NEXT: callq __divei4@PLT +; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: movq %rdi, 24(%rbx) +; X64-NEXT: movq %rsi, 16(%rbx) +; X64-NEXT: movq %rdx, 8(%rbx) +; X64-NEXT: movq %rcx, (%rbx) +; X64-NEXT: andl $1, %eax +; X64-NEXT: movb %al, 32(%rbx) +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: addq $200, %rsp +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: popq %rbx +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %r14 +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %res = sdiv i257 %a, %b + ret i257 %res +} + +define i1001 @srem1001(i1001 %a, i1001 %b) { +; X86-LABEL: srem1001: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $496, %esp # imm = 0x1F0 +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 132(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 128(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 124(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 120(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 116(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 112(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 108(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 104(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 100(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 96(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 92(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 88(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 84(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 80(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 76(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 72(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 68(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 64(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 60(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 56(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 52(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 48(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 136(%ebp), %eax +; X86-NEXT: shll $23, %eax +; X86-NEXT: sarl $23, %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 264(%ebp), %eax +; X86-NEXT: shll $23, %eax +; X86-NEXT: sarl $23, %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 260(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 256(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 252(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 248(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 244(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 240(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 236(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 232(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 228(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 224(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 220(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 216(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 212(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 208(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 204(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 200(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 196(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 192(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 188(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 184(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 180(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 176(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 172(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 168(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 164(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 160(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 156(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 152(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 148(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 144(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 140(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: pushl $1024 # imm = 0x400 +; X86-NEXT: pushl %eax +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edx +; X86-NEXT: calll __modei4 +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 120(%esi) +; X86-NEXT: movl %eax, 116(%esi) +; X86-NEXT: movl %ecx, 112(%esi) +; X86-NEXT: movl %edx, 108(%esi) +; X86-NEXT: movl %edi, 104(%esi) +; X86-NEXT: movl (%esp), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 100(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 96(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 92(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 88(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 84(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 80(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 76(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 72(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 68(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 64(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 60(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 56(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 52(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 48(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 44(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 40(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 36(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 32(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 28(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 24(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 20(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 16(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 12(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 8(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, 4(%esi) +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: movl %eax, (%esi) +; X86-NEXT: movl $511, %eax # imm = 0x1FF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movw %ax, 124(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; +; X64-LABEL: srem1001: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: pushq %r15 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: pushq %r14 +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: pushq %r13 +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: pushq %r12 +; X64-NEXT: .cfi_def_cfa_offset 48 +; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 56 +; X64-NEXT: subq $408, %rsp # imm = 0x198 +; X64-NEXT: .cfi_def_cfa_offset 464 +; X64-NEXT: .cfi_offset %rbx, -56 +; X64-NEXT: .cfi_offset %r12, -48 +; X64-NEXT: .cfi_offset %r13, -40 +; X64-NEXT: .cfi_offset %r14, -32 +; X64-NEXT: .cfi_offset %r15, -24 +; X64-NEXT: .cfi_offset %rbp, -16 +; X64-NEXT: movq %rdi, %rbx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: shlq $23, %rax +; X64-NEXT: sarq $23, %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: shlq $23, %rax +; X64-NEXT: sarq $23, %rax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movl $1024, %ecx # imm = 0x400 +; X64-NEXT: callq __modei4@PLT +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r11 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r14 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r15 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r12 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 +; X64-NEXT: movq %r9, 112(%rbx) +; X64-NEXT: movq %rcx, 104(%rbx) +; X64-NEXT: movq %rbp, 96(%rbx) +; X64-NEXT: movq %rdi, 88(%rbx) +; X64-NEXT: movq %rsi, 80(%rbx) +; X64-NEXT: movq %rdx, 72(%rbx) +; X64-NEXT: movq %r8, 64(%rbx) +; X64-NEXT: movq %r13, 56(%rbx) +; X64-NEXT: movq %r12, 48(%rbx) +; X64-NEXT: movq %r15, 40(%rbx) +; X64-NEXT: movq %r14, 32(%rbx) +; X64-NEXT: movq %r11, 24(%rbx) +; X64-NEXT: movq %r10, 16(%rbx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, 8(%rbx) +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rcx, (%rbx) +; X64-NEXT: movl %eax, 120(%rbx) +; X64-NEXT: shrq $32, %rax +; X64-NEXT: andl $511, %eax # imm = 0x1FF +; X64-NEXT: movw %ax, 124(%rbx) +; X64-NEXT: movq %rbx, %rax +; X64-NEXT: addq $408, %rsp # imm = 0x198 +; X64-NEXT: .cfi_def_cfa_offset 56 +; X64-NEXT: popq %rbx +; X64-NEXT: .cfi_def_cfa_offset 48 +; X64-NEXT: popq %r12 +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: popq %r13 +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: popq %r14 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: popq %r15 +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %rbp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %res = srem i1001 %a, %b + ret i1001 %res +} + +define i129 @chain129(i129 %a, i129 %b) { +; X86-LABEL: chain129: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $200, %esp +; X86-NEXT: .cfi_offset %esi, -20 +; X86-NEXT: .cfi_offset %edi, -16 +; X86-NEXT: .cfi_offset %ebx, -12 +; X86-NEXT: movl 24(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 20(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 48(%ebp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl 8(%ebp), %esi +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl %eax +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edx +; X86-NEXT: calll __udivei4 +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $1, %eax +; X86-NEXT: negl %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $17, (%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esp, %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %ecx +; X86-NEXT: leal {{[0-9]+}}(%esp), %edx +; X86-NEXT: pushl $256 # imm = 0x100 +; X86-NEXT: pushl %eax +; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edx +; X86-NEXT: calll __divei4 +; X86-NEXT: addl $16, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl %ebx, 12(%esi) +; X86-NEXT: movl %edi, 8(%esi) +; X86-NEXT: movl %edx, 4(%esi) +; X86-NEXT: movl %ecx, (%esi) +; X86-NEXT: andl $1, %eax +; X86-NEXT: movb %al, 16(%esi) +; X86-NEXT: movl %esi, %eax +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl $4 +; +; X64-LABEL: chain129: +; X64: # %bb.0: +; X64-NEXT: subq $200, %rsp +; X64-NEXT: .cfi_def_cfa_offset 208 +; X64-NEXT: andl $1, %r9d +; X64-NEXT: andl $1, %edx +; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movl $256, %ecx # imm = 0x100 +; X64-NEXT: callq __udivei4@PLT +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: andl $1, %eax +; X64-NEXT: negq %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $17, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movl $256, %ecx # imm = 0x100 +; X64-NEXT: callq __divei4@PLT +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: andl $1, %ecx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: addq $200, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %res = udiv i129 %a, %b + %res2 = sdiv i129 %res, 17 + ret i129 %res2 +}