Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" @@ -6989,10 +6990,26 @@ /// migrated to using LowerCall, this hook should be integrated into SDISel. std::pair TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { + Triple Trip(CLI.DAG.getTarget().getTargetTriple()); + int Win64 = Trip.isArch64Bit() && Trip.isOSMSVCRT(); + // Handle the incoming return values from the call. CLI.Ins.clear(); SmallVector RetTys; ComputeValueVTs(*this, CLI.RetTy, RetTys); + bool ret_m128 = false; EVT RetVT_m128; + if (Win64 && RetTys.size() == 1) { + EVT VT = RetTys[0]; + if ((VT.isFloatingPoint() || VT.isInteger()) && VT.getSizeInBits() == 128) { + // change the return type to something llvm can represent, that + // we know will end up in the xmm0 register. we will patch up the + // return type later in this function + RetTys[0] = static_cast(MVT::v2i64); + CLI.IsTailCall = false; + ret_m128 = true; + RetVT_m128 = VT; + } + } for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -7018,7 +7035,22 @@ ArgListTy &Args = CLI.Args; for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; - ComputeValueVTs(*this, Args[i].Ty, ValueVTs); + TargetLowering::ArgListEntry Entry = Args[i]; + if (Win64 && Entry.Node.getValueType().getSizeInBits() > 64) { + SDValue StackPtr = CLI.DAG.CreateStackTemporary(Entry.Node.getValueType(), 16); + int SPFI = cast(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI); + // Emit a store to the stack slot. + CLI.Chain = CLI.DAG.getStore(CLI.Chain, CLI.DL, Entry.Node, StackPtr, PtrInfo, + false, false, 16); + Entry.Node = StackPtr; + Entry.Ty = PointerType::get(Entry.Ty,0); + Entry.isSExt = false; + Entry.isZExt = false; + Args[i] = Entry; + CLI.IsTailCall = false; + } + ComputeValueVTs(*this, Entry.Ty, ValueVTs); for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; @@ -7160,6 +7192,8 @@ SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, CLI.DAG.getVTList(&RetTys[0], RetTys.size()), &ReturnValues[0], ReturnValues.size()); + if (ret_m128) + Res = CLI.DAG.getNode(ISD::BITCAST, CLI.DL, RetVT_m128, Res); return std::make_pair(Res, CLI.Chain); } Index: test/CodeGen/X86/mod128.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/mod128.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64 +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64 + +define i128 @mod128(i128 %x) nounwind { + ; X86-64: movl $3, %edx + ; X86-64: xorl %ecx, %ecx + ; X86-64: callq __modti3 + + ; WIN64-NOT: movl $3, %r8d + ; WIN64-NOT: xorl %r9d, %r9d + ; WIN64: callq __modti3 + + %1 = srem i128 %x, 3 + ret i128 %1 +}