Index: lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1849,6 +1849,17 @@ // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { + Triple Trip(DAG.getTarget().getTargetTriple()); + int Win64 = Trip.isArch64Bit() && Trip.isOSWindows(); + + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // TLI.isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + bool isTailCall = true; + DebugLoc dl = Node->getDebugLoc(); + TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { @@ -1857,31 +1868,46 @@ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; + if (Win64 && Entry.Node.getValueType().getSizeInBits() > 64) { + SDValue StackPtr = DAG.CreateStackTemporary(Entry.Node.getValueType(), 16); + int SPFI = cast(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + // Emit a store to the stack slot. + InChain = DAG.getStore(InChain, dl, Entry.Node, StackPtr, PtrInfo, + false, false, 16); + Entry.Node = StackPtr; + Entry.Ty = PointerType::get(Entry.Ty,0); + Entry.isSExt = false; + Entry.isZExt = false; + isTailCall = false; + } Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); - - // By default, the input chain to this libcall is the entry node of the - // function. If the libcall is going to be emitted as a tail call then - // TLI.isUsedByReturnOnly will change it to the right chain if the return - // node which is being folded has a non-entry input chain. - SDValue InChain = DAG.getEntryNode(); - - // isTailCall may be true since the callee does not reference caller stack - // frame. Check if it's in the right position. - SDValue TCChain = InChain; - bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain); - if (isTailCall) - InChain = TCChain; + EVT RetVT = Node->getValueType(0); + Type *RetTy; + if (Win64 && RetVT.getSizeInBits() == 128) { + RetTy = static_cast(MVT::v2i64).getTypeForEVT(*DAG.getContext()); + isTailCall = false; // necessary because of the calling convention mismatch between LLVM and native Win64 + } else + RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + if (isTailCall) { + // isTailCall may be true since the callee does not reference caller stack + // frame. Check if it's in the right position. + SDValue TCChain = InChain; + isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain); + if (isTailCall) + InChain = TCChain; + } TargetLowering:: CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), isTailCall, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Node->getDebugLoc()); + Callee, Args, DAG, dl); std::pair CallInfo = TLI.LowerCallTo(CLI); @@ -1889,7 +1915,10 @@ // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); - return CallInfo.first; + SDValue ret = CallInfo.first; + if (isa(RetTy)) + ret = DAG.getNode(ISD::BITCAST, dl, RetVT, ret); + return ret; } /// ExpandLibCall - Generate a libcall taking the given operands as arguments @@ -1900,27 +1929,51 @@ TargetLowering::ArgListTy Args; Args.reserve(NumOps); + Triple Trip(DAG.getTarget().getTargetTriple()); + int Win64 = Trip.isArch64Bit() && Trip.isOSWindows(); + + SDValue InChain = DAG.getEntryNode(); TargetLowering::ArgListEntry Entry; for (unsigned i = 0; i != NumOps; ++i) { Entry.Node = Ops[i]; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); Entry.isSExt = isSigned; Entry.isZExt = !isSigned; + if (Win64 && Entry.Node.getValueType().getSizeInBits() > 64) { + SDValue StackPtr = DAG.CreateStackTemporary(Entry.Node.getValueType(), 16); + int SPFI = cast(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + // Emit a store to the stack slot. + InChain = DAG.getStore(InChain, dl, Entry.Node, StackPtr, PtrInfo, + false, false, 16); + Entry.Node = StackPtr; + Entry.Ty = PointerType::get(Entry.Ty,0); + Entry.isSExt = false; + Entry.isZExt = false; + } Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + Type *RetTy; + if (Win64 && RetVT.getSizeInBits() == 128) { + RetTy = static_cast(MVT::v2i64).getTypeForEVT(*DAG.getContext()); + } else { + RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + } TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); std::pair CallInfo = TLI.LowerCallTo(CLI); - return CallInfo.first; + SDValue ret = CallInfo.first; + if (isa(RetTy)) + ret = DAG.getNode(ISD::BITCAST, dl, RetVT, ret); + return ret; } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to @@ -1931,6 +1984,10 @@ bool isSigned) { SDValue InChain = Node->getOperand(0); + Triple Trip(DAG.getTarget().getTargetTriple()); + int Win64 = Trip.isArch64Bit() && Trip.isOSWindows(); + DebugLoc dl = Node->getDebugLoc(); + TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { @@ -1940,19 +1997,37 @@ Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; + if (Win64 && Entry.Node.getValueType().getSizeInBits() > 64) { + SDValue StackPtr = DAG.CreateStackTemporary(Entry.Node.getValueType(), 16); + int SPFI = cast(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + // Emit a store to the stack slot. + InChain = DAG.getStore(InChain, dl, Entry.Node, StackPtr, PtrInfo, + false, false, 16); + Entry.Node = StackPtr; + Entry.Ty = PointerType::get(Entry.Ty,0); + Entry.isSExt = false; + Entry.isZExt = false; + } Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - - Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + EVT RetVT = Node->getValueType(0); + Type *RetTy; + if (Win64 && RetVT.getSizeInBits() == 128) + RetTy = static_cast(MVT::v2i64).getTypeForEVT(*DAG.getContext()); + else + RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering:: CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Node->getDebugLoc()); + Callee, Args, DAG, dl); std::pair CallInfo = TLI.LowerCallTo(CLI); + if (isa(RetTy)) + CallInfo.first = DAG.getNode(ISD::BITCAST, dl, RetVT, CallInfo.first); return CallInfo; } @@ -2056,10 +2131,14 @@ // Legalizing the call will automatically add the previous call to the // dependence. SDValue InChain = DAG.getEntryNode(); + DebugLoc dl = Node->getDebugLoc(); EVT RetVT = Node->getValueType(0); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + Triple Trip(DAG.getTarget().getTargetTriple()); + int Win64 = Trip.isArch64Bit() && Trip.isOSWindows(); + TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { @@ -2068,6 +2147,18 @@ Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; + if (Win64 && Entry.Node.getValueType().getSizeInBits() > 64) { + SDValue StackPtr = DAG.CreateStackTemporary(Entry.Node.getValueType(), 16); + int SPFI = cast(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + // Emit a store to the stack slot. + InChain = DAG.getStore(InChain, dl, Entry.Node, StackPtr, PtrInfo, + false, false, 16); + Entry.Node = StackPtr; + Entry.Ty = PointerType::get(Entry.Ty,0); + Entry.isSExt = false; + Entry.isZExt = false; + } Args.push_back(Entry); } @@ -2082,7 +2173,8 @@ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - DebugLoc dl = Node->getDebugLoc(); + if (Win64 && RetVT.getSizeInBits() == 128) + RetTy = static_cast(MVT::v2i64).getTypeForEVT(*DAG.getContext()); TargetLowering:: CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, @@ -2093,7 +2185,10 @@ // Remainder is loaded back from the stack frame. SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo(), false, false, false, 0); - Results.push_back(CallInfo.first); + if (isa(RetTy)) + Results.push_back(DAG.getNode(ISD::BITCAST, dl, RetVT, CallInfo.first)); + else + Results.push_back(CallInfo.first); Results.push_back(Rem); } Index: lib/CodeGen/SelectionDAG/LegalizeTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -15,6 +15,7 @@ #include "LegalizeTypes.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/CommandLine.h" @@ -1069,6 +1070,10 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { + Triple Trip(DAG.getTarget().getTargetTriple()); + int Win64 = Trip.isArch64Bit() && Trip.isOSWindows(); + DebugLoc dl = Node->getDebugLoc(); + SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -1080,12 +1085,30 @@ Entry.Ty = ArgTy; Entry.isSExt = isSigned; Entry.isZExt = !isSigned; + if (Win64 && Entry.Node.getValueType().getSizeInBits() > 64) { + SDValue StackPtr = DAG.CreateStackTemporary(Entry.Node.getValueType(), 16); + int SPFI = cast(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + // Emit a store to the stack slot. + InChain = DAG.getStore(InChain, dl, Entry.Node, StackPtr, PtrInfo, + false, false, 16); + Entry.Node = StackPtr; + Entry.Ty = PointerType::get(Entry.Ty,0); + Entry.isSExt = false; + Entry.isZExt = false; + } Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + EVT RetVT = Node->getValueType(0); + Type *RetTy; + if (Win64 && RetVT.getSizeInBits() == 128) + RetTy = static_cast(MVT::v2i64).getTypeForEVT(*DAG.getContext()); + else + RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + TargetLowering:: CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, @@ -1093,6 +1116,8 @@ Callee, Args, DAG, Node->getDebugLoc()); std::pair CallInfo = TLI.LowerCallTo(CLI); + if (isa(RetTy)) + CallInfo.first = DAG.getNode(ISD::BITCAST, dl, RetVT, CallInfo.first); return CallInfo; } Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -14,6 +14,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -74,26 +75,48 @@ TargetLowering::ArgListTy Args; Args.reserve(NumOps); + Triple Trip(DAG.getTarget().getTargetTriple()); + int Win64 = Trip.isArch64Bit() && Trip.isOSWindows(); + SDValue InChain = DAG.getEntryNode(); TargetLowering::ArgListEntry Entry; for (unsigned i = 0; i != NumOps; ++i) { Entry.Node = Ops[i]; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); Entry.isSExt = isSigned; Entry.isZExt = !isSigned; + if (Win64 && Entry.Node.getValueType().getSizeInBits() > 64) { + SDValue StackPtr = DAG.CreateStackTemporary(Entry.Node.getValueType(), 16); + int SPFI = cast(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + // Emit a store to the stack slot. + InChain = DAG.getStore(InChain, dl, Entry.Node, StackPtr, PtrInfo, + false, false, 16); + Entry.Node = StackPtr; + Entry.Ty = PointerType::get(Entry.Ty,0); + Entry.isSExt = false; + Entry.isZExt = false; + } Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); - Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + Type *RetTy; + if (Win64 && RetVT.getSizeInBits() == 128) + RetTy = static_cast(MVT::v2i64).getTypeForEVT(*DAG.getContext()); + else + RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); std::pair CallInfo = LowerCallTo(CLI); - return CallInfo.first; + SDValue ret = CallInfo.first; + if (isa(RetTy)) + ret = DAG.getNode(ISD::BITCAST, dl, RetVT, ret); + return ret; } Index: test/CodeGen/X86/mod128.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/mod128.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64 +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64 + +define i128 @mod128(i128 %x) nounwind { + ; X86-64: movl $3, %edx + ; X86-64: xorl %ecx, %ecx + ; X86-64: callq __modti3 + + ; WIN64-NOT: movl $3, %r8d + ; WIN64-NOT: xorl %r9d, %r9d + ; WIN64: leaq 32(%rsp), %rdx + ; WIN64: movq $0, 40(%rsp) + ; WIN64: movq $3, 32(%rsp) + ; WIN64: callq __modti3 + ; WIN64: movd %xmm0, %rax + ; WIN64: punpckhqdq %xmm0, %xmm0 + ; WIN64: movd %xmm0, %rdx + ; WIN64: leaq 72(%rsp), %rsp + + %1 = srem i128 %x, 3 + ret i128 %1 +}