Index: include/llvm/IR/CallingConv.h =================================================================== --- include/llvm/IR/CallingConv.h +++ include/llvm/IR/CallingConv.h @@ -205,6 +205,12 @@ /// which have an "optimized" convention using additional registers. MSP430_BUILTIN = 94, + /// \brief The C convention as implemented on Windows/arm64. This + /// is similar to the normal C (AAPCS) calling convention for normal + /// functions, but floats are passed in integer registers to variadic + /// functions. + AArch64_Win64 = 95, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; Index: lib/AsmParser/LLLexer.cpp =================================================================== --- lib/AsmParser/LLLexer.cpp +++ lib/AsmParser/LLLexer.cpp @@ -606,6 +606,7 @@ KEYWORD(amdgpu_ps); KEYWORD(amdgpu_cs); KEYWORD(amdgpu_kernel); + KEYWORD(aarch64_win64cc); KEYWORD(cc); KEYWORD(c); Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -1687,6 +1687,7 @@ /// ::= 'amdgpu_ps' /// ::= 'amdgpu_cs' /// ::= 'amdgpu_kernel' +/// ::= 'aarch64_win64cc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(unsigned &CC) { @@ -1729,6 +1730,7 @@ case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break; case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; + case lltok::kw_aarch64_win64cc: CC = CallingConv::AArch64_Win64; break; case lltok::kw_cc: { Lex.Lex(); return ParseUInt32(CC); Index: lib/AsmParser/LLToken.h =================================================================== --- lib/AsmParser/LLToken.h +++ lib/AsmParser/LLToken.h @@ -158,6 +158,7 @@ kw_amdgpu_ps, kw_amdgpu_cs, kw_amdgpu_kernel, + kw_aarch64_win64cc, // Attributes: kw_attributes, Index: lib/IR/AsmWriter.cpp =================================================================== --- lib/IR/AsmWriter.cpp +++ lib/IR/AsmWriter.cpp @@ -378,6 +378,7 @@ case CallingConv::AMDGPU_PS: Out << "amdgpu_ps"; break; case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break; case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break; + case CallingConv::AArch64_Win64: Out << "aarch64_win64cc"; break; } } Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -41,6 +41,10 @@ // | | // |-----------------------------------| // | | +// | (Win64 only) varargs from reg | +// | | +// |-----------------------------------| +// | | // | prev_fp, prev_lr | // | (a.k.a. "frame record") | // |-----------------------------------| <- fp(=x29) @@ -463,6 +467,8 @@ AArch64FunctionInfo *AFI = MF.getInfo(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); bool HasFP = hasFP(MF); + unsigned GPRSaveSize = alignTo(AFI->getVarArgsGPRSize(), 16); + bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. @@ -473,6 +479,10 @@ if (MF.getFunction()->getCallingConv() == CallingConv::GHC) return; + if (GPRSaveSize && IsWin64) + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -GPRSaveSize, TII, + MachineInstr::FrameSetup); + int NumBytes = (int)MFI.getStackSize(); if (!AFI->hasStackFrame()) { assert(!HasFP && "unexpected function without stack frame but with FP"); @@ -704,6 +714,7 @@ } int NumBytes = MFI.getStackSize(); const AArch64FunctionInfo *AFI = MF.getInfo(); + bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. @@ -728,6 +739,9 @@ ArgumentPopSize = AFI->getArgumentStackToRestore(); } + if (IsWin64) + ArgumentPopSize += alignTo(AFI->getVarArgsGPRSize(), 16); + // The stack frame should be like below, // // ---------------------- --- Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -541,6 +541,7 @@ SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2650,6 +2650,7 @@ case CallingConv::PreserveMost: case CallingConv::CXX_FAST_TLS: case CallingConv::Swift: + case CallingConv::AArch64_Win64: if (!Subtarget->isTargetDarwin()) return CC_AArch64_AAPCS; return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; @@ -2668,6 +2669,7 @@ SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); + bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; @@ -2824,10 +2826,12 @@ // varargs AArch64FunctionInfo *FuncInfo = MF.getInfo(); if (isVarArg) { - if (!Subtarget->isTargetDarwin()) { + if (!Subtarget->isTargetDarwin() || IsWin64) { // The AAPCS variadic function ABI is identical to the non-variadic // one. As a result there may be more arguments in registers and we should // save them for future reference. + // For Win64 variadic functions, all float arguments are passed in integer + // registers. saveVarArgRegisters(CCInfo, DAG, DL, Chain); } @@ -2869,6 +2873,7 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo(); auto PtrVT = getPointerTy(DAG.getDataLayout()); + bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()); SmallVector MemOps; @@ -2881,7 +2886,10 @@ unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); int GPRIdx = 0; if (GPRSaveSize != 0) { - GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false); + if (IsWin64) + GPRIdx = MFI.CreateFixedObject(GPRSaveSize, GPRSaveSize & 15, false); + else + GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false); SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT); @@ -2890,6 +2898,8 @@ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); SDValue Store = DAG.getStore( Val.getValue(1), DL, Val, FIN, + IsWin64 ? + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), GPRIdx, 8) : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8)); MemOps.push_back(Store); FIN = @@ -2899,7 +2909,8 @@ FuncInfo->setVarArgsGPRIndex(GPRIdx); FuncInfo->setVarArgsGPRSize(GPRSaveSize); - if (Subtarget->hasFPARMv8()) { + if (Subtarget->hasFPARMv8() && + !Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) { static const MCPhysReg FPRArgRegs[] = { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; @@ -4491,6 +4502,19 @@ MachinePointerInfo(SV)); } +SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op, + SelectionDAG &DAG) const { + AArch64FunctionInfo *FuncInfo = + DAG.getMachineFunction().getInfo(); + + SDLoc DL(Op); + SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), + getPointerTy(DAG.getDataLayout())); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), + MachinePointerInfo(SV)); +} + SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const { // The layout of the va_list struct is specified in the AArch64 Procedure Call @@ -4562,6 +4586,10 @@ SDValue AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + + if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) + return LowerWin64_VASTART(Op, DAG); return Subtarget->isTargetDarwin() ? LowerDarwin_VASTART(Op, DAG) : LowerAAPCS_VASTART(Op, DAG); } @@ -10783,7 +10811,7 @@ unsigned AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const { - if (Subtarget->isTargetDarwin()) + if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) return getPointerTy(DL).getSizeInBits(); return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32; Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -297,6 +297,17 @@ bool enableEarlyIfConversion() const override; std::unique_ptr getCustomPBQPConstraints() const override; + + bool isCallingConvWin64(CallingConv::ID CC) const { + switch (CC) { + case CallingConv::C: + return isTargetWindows(); + case CallingConv::AArch64_Win64: + return true; + default: + return false; + } + } }; } // End llvm namespace