Index: include/llvm/IR/CallingConv.h =================================================================== --- include/llvm/IR/CallingConv.h +++ include/llvm/IR/CallingConv.h @@ -205,6 +205,12 @@ /// which have an "optimized" convention using additional registers. MSP430_BUILTIN = 94, + /// \brief The C convention as implemented on Windows/arm64. This + /// is similar to the normal C (AAPCS) calling convention for normal + /// functions, but floats are passed in integer registers to variadic + /// functions. + AArch64_Win64 = 95, + /// The highest possible calling convention ID. Must be some 2^k - 1. MaxID = 1023 }; Index: lib/AsmParser/LLLexer.cpp =================================================================== --- lib/AsmParser/LLLexer.cpp +++ lib/AsmParser/LLLexer.cpp @@ -606,6 +606,7 @@ KEYWORD(amdgpu_ps); KEYWORD(amdgpu_cs); KEYWORD(amdgpu_kernel); + KEYWORD(aarch64_win64cc); KEYWORD(cc); KEYWORD(c); Index: lib/AsmParser/LLParser.cpp =================================================================== --- lib/AsmParser/LLParser.cpp +++ lib/AsmParser/LLParser.cpp @@ -1687,6 +1687,7 @@ /// ::= 'amdgpu_ps' /// ::= 'amdgpu_cs' /// ::= 'amdgpu_kernel' +/// ::= 'aarch64_win64cc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(unsigned &CC) { @@ -1729,6 +1730,7 @@ case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break; case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break; case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break; + case lltok::kw_aarch64_win64cc: CC = CallingConv::AArch64_Win64; break; case lltok::kw_cc: { Lex.Lex(); return ParseUInt32(CC); Index: lib/AsmParser/LLToken.h =================================================================== --- lib/AsmParser/LLToken.h +++ lib/AsmParser/LLToken.h @@ -158,6 +158,7 @@ kw_amdgpu_ps, kw_amdgpu_cs, kw_amdgpu_kernel, + kw_aarch64_win64cc, // Attributes: kw_attributes, Index: lib/IR/AsmWriter.cpp =================================================================== --- lib/IR/AsmWriter.cpp +++ lib/IR/AsmWriter.cpp @@ -378,6 +378,7 @@ case CallingConv::AMDGPU_PS: Out << "amdgpu_ps"; break; case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break; case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break; + case CallingConv::AArch64_Win64: Out << "aarch64_win64cc"; break; } } Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -958,7 +958,8 @@ unsigned GPRSaveSize = AFI->getVarArgsGPRSize(); const AArch64Subtarget &Subtarget = MF.getSubtarget(); - if (Subtarget.isTargetWindows()) + bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()); + if (IsWin64) Offset -= alignTo(GPRSaveSize, 16); for (unsigned i = 0; i < Count; ++i) { Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2655,6 +2655,8 @@ if (!Subtarget->isTargetDarwin()) return CC_AArch64_AAPCS; return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; + case CallingConv::AArch64_Win64: + return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS; } } @@ -2670,6 +2672,7 @@ SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); + bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; @@ -2826,7 +2829,7 @@ // varargs AArch64FunctionInfo *FuncInfo = MF.getInfo(); if (isVarArg) { - if (!Subtarget->isTargetDarwin()) { + if (!Subtarget->isTargetDarwin() || IsWin64) { // The AAPCS variadic function ABI is identical to the non-variadic // one. As a result there may be more arguments in registers and we should // save them for future reference. @@ -2873,6 +2876,7 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo(); auto PtrVT = getPointerTy(DAG.getDataLayout()); + bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()); SmallVector MemOps; @@ -2885,7 +2889,7 @@ unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR); int GPRIdx = 0; if (GPRSaveSize != 0) { - if (Subtarget->isTargetWindows()) + if (IsWin64) GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false); else GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false); @@ -2897,7 +2901,7 @@ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); SDValue Store = DAG.getStore( Val.getValue(1), DL, Val, FIN, - Subtarget->isTargetWindows() + IsWin64 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), GPRIdx, (i - FirstVariadicGPR) * 8) @@ -2910,7 +2914,7 @@ FuncInfo->setVarArgsGPRIndex(GPRIdx); FuncInfo->setVarArgsGPRSize(GPRSaveSize); - if (Subtarget->hasFPARMv8() && !Subtarget->isTargetWindows()) { + if (Subtarget->hasFPARMv8() && !IsWin64) { static const MCPhysReg FPRArgRegs[] = { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; @@ -4588,7 +4592,9 @@ SDValue AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { - if (Subtarget->isTargetWindows()) + MachineFunction &MF = DAG.getMachineFunction(); + + if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv())) return LowerWin64_VASTART(Op, DAG); else if (Subtarget->isTargetDarwin()) return LowerDarwin_VASTART(Op, DAG); Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -306,6 +306,17 @@ bool enableEarlyIfConversion() const override; std::unique_ptr getCustomPBQPConstraints() const override; + + bool isCallingConvWin64(CallingConv::ID CC) const { + switch (CC) { + case CallingConv::C: + return isTargetWindows(); + case CallingConv::AArch64_Win64: + return true; + default: + return false; + } + } }; } // End llvm namespace Index: test/CodeGen/AArch64/aarch64_win64cc_vararg.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/aarch64_win64cc_vararg.ll @@ -0,0 +1,74 @@ +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s + +define aarch64_win64cc void @pass_va(i32 %count, ...) nounwind { +entry: +; CHECK: sub sp, sp, #80 +; CHECK: add x8, sp, #24 +; CHECK: add x0, sp, #24 +; CHECK: stp x6, x7, [sp, #64] +; CHECK: stp x4, x5, [sp, #48] +; CHECK: stp x2, x3, [sp, #32] +; CHECK: str x1, [sp, #24] +; CHECK: stp x30, x8, [sp] +; CHECK: bl other_func +; CHECK: ldr x30, [sp], #80 +; CHECK: ret + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %ap2 = load i8*, i8** %ap, align 8 + call void @other_func(i8* %ap2) + ret void +} + +declare void @other_func(i8*) local_unnamed_addr + +declare void @llvm.va_start(i8*) nounwind +declare void @llvm.va_copy(i8*, i8*) nounwind + +; CHECK-LABEL: f9: +; CHECK: sub sp, sp, #16 +; CHECK: add x8, sp, #24 +; CHECK: add x0, sp, #24 +; CHECK: str x8, [sp, #8] +; CHECK: add sp, sp, #16 +; CHECK: ret +define aarch64_win64cc i8* @f9(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %ap2 = load i8*, i8** %ap, align 8 + ret i8* %ap2 +} + +; CHECK-LABEL: f8: +; CHECK: sub sp, sp, #16 +; CHECK: add x8, sp, #16 +; CHECK: add x0, sp, #16 +; CHECK: str x8, [sp, #8] +; CHECK: add sp, sp, #16 +; CHECK: ret +define aarch64_win64cc i8* @f8(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %ap2 = load i8*, i8** %ap, align 8 + ret i8* %ap2 +} + +; CHECK-LABEL: f7: +; CHECK: sub sp, sp, #16 +; CHECK: add x8, sp, #8 +; CHECK: add x0, sp, #8 +; CHECK: stp x8, x7, [sp], #16 +; CHECK: ret +define aarch64_win64cc i8* @f7(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, ...) nounwind { +entry: + %ap = alloca i8*, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %ap2 = load i8*, i8** %ap, align 8 + ret i8* %ap2 +}