diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -503,6 +503,11 @@ // Bit position of rounding mode bits in FPCR. const unsigned RoundingBitsPos = 22; + +// Registers used to pass function arguments. +const ArrayRef getGPRArgRegs(); +const ArrayRef getFPRArgRegs(); + } // namespace AArch64 class AArch64Subtarget; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -140,6 +140,17 @@ /// Value type used for condition codes. static const MVT MVT_CC = MVT::i32; +static const MCPhysReg GPRArgRegs[] = {AArch64::X0, AArch64::X1, AArch64::X2, + AArch64::X3, AArch64::X4, AArch64::X5, + AArch64::X6, AArch64::X7}; +static const MCPhysReg FPRArgRegs[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, + AArch64::Q3, AArch64::Q4, AArch64::Q5, + AArch64::Q6, AArch64::Q7}; + +const ArrayRef llvm::AArch64::getGPRArgRegs() { return GPRArgRegs; } + +const ArrayRef llvm::AArch64::getFPRArgRegs() { return FPRArgRegs; } + static inline EVT getPackedSVEVectorVT(EVT VT) { switch (VT.getSimpleVT().SimpleTy) { default: @@ -6693,10 +6704,8 @@ SmallVector MemOps; - static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2, - AArch64::X3, AArch64::X4, AArch64::X5, - AArch64::X6, AArch64::X7 }; - unsigned NumGPRArgRegs = std::size(GPRArgRegs); + auto GPRArgRegs = AArch64::getGPRArgRegs(); + unsigned NumGPRArgRegs = GPRArgRegs.size(); if (Subtarget->isWindowsArm64EC()) { // In the ARM64EC ABI, only x0-x3 are used to pass arguments to varargs // functions. @@ -6746,10 +6755,8 @@ FuncInfo->setVarArgsGPRSize(GPRSaveSize); if (Subtarget->hasFPARMv8() && !IsWin64) { - static const MCPhysReg FPRArgRegs[] = { - AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, - AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7}; - static const unsigned NumFPRArgRegs = std::size(FPRArgRegs); + auto FPRArgRegs = AArch64::getFPRArgRegs(); + const unsigned NumFPRArgRegs = FPRArgRegs.size(); unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs); unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h @@ -66,6 +66,10 @@ using MemHandler = std::function; + void saveVarArgRegisters(MachineIRBuilder &MIRBuilder, + CallLowering::IncomingValueHandler &Handler, + CCState &CCInfo) const; + bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info, SmallVectorImpl &OutArgs) const; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -15,6 +15,7 @@ #include "AArch64CallLowering.h" #include "AArch64ISelLowering.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" @@ -546,6 +547,88 @@ return false; } +void AArch64CallLowering::saveVarArgRegisters( + MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler, + CCState &CCInfo) const { + auto GPRArgRegs = AArch64::getGPRArgRegs(); + auto FPRArgRegs = AArch64::getFPRArgRegs(); + + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + AArch64FunctionInfo *FuncInfo = MF.getInfo(); + auto &Subtarget = MF.getSubtarget(); + bool IsWin64CC = + Subtarget.isCallingConvWin64(CCInfo.getCallingConv()); + const LLT p0 = LLT::pointer(0, 64); + const LLT s64 = LLT::scalar(64); + + unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs); + unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1; + + unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR); + int GPRIdx = 0; + if (GPRSaveSize != 0) { + if (IsWin64CC) { + GPRIdx = MFI.CreateFixedObject(GPRSaveSize, + -static_cast(GPRSaveSize), false); + } else + GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false); + + auto FIN = MIRBuilder.buildFrameIndex(p0, GPRIdx); + auto Offset = + MIRBuilder.buildConstant(MRI.createGenericVirtualRegister(s64), 8); + + for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) { + Register Val = MRI.createGenericVirtualRegister(s64); + Handler.assignValueToReg( + Val, GPRArgRegs[i], + CCValAssign::getReg(i + MF.getFunction().getNumOperands(), MVT::i64, + GPRArgRegs[i], MVT::i64, CCValAssign::Full)); + auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack( + MF, GPRIdx, (i - FirstVariadicGPR) * 8) + : MachinePointerInfo::getStack(MF, i * 8); + MIRBuilder.buildStore(Val, FIN, MPO, inferAlignFromPtrInfo(MF, MPO)); + + FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0), + FIN.getReg(0), Offset); + } + } + FuncInfo->setVarArgsGPRIndex(GPRIdx); + FuncInfo->setVarArgsGPRSize(GPRSaveSize); + + if (Subtarget.hasFPARMv8() && !IsWin64CC) { + unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs); + + unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR); + int FPRIdx = 0; + if (FPRSaveSize != 0) { + FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false); + + auto FIN = MIRBuilder.buildFrameIndex(p0, FPRIdx); + auto Offset = + MIRBuilder.buildConstant(MRI.createGenericVirtualRegister(s64), 16); + + for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) { + Register Val = MRI.createGenericVirtualRegister(LLT::scalar(128)); + Handler.assignValueToReg( + Val, FPRArgRegs[i], + CCValAssign::getReg( + i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs, + MVT::f128, FPRArgRegs[i], MVT::f128, CCValAssign::Full)); + + auto MPO = MachinePointerInfo::getStack(MF, i * 16); + MIRBuilder.buildStore(Val, FIN, MPO, inferAlignFromPtrInfo(MF, MPO)); + + FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0), + FIN.getReg(0), Offset); + } + } + FuncInfo->setVarArgsFPRIndex(FPRIdx); + FuncInfo->setVarArgsFPRSize(FPRSaveSize); + } +} + bool AArch64CallLowering::lowerFormalArguments( MachineIRBuilder &MIRBuilder, const Function &F, ArrayRef> VRegs, FunctionLoweringInfo &FLI) const { @@ -553,6 +636,9 @@ MachineBasicBlock &MBB = MIRBuilder.getMBB(); MachineRegisterInfo &MRI = MF.getRegInfo(); auto &DL = F.getParent()->getDataLayout(); + auto &Subtarget = MF.getSubtarget(); + // TODO: Support Arm64EC + bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv()) && !Subtarget.isWindowsArm64EC(); SmallVector SplitArgs; SmallVector> BoolArgs; @@ -598,13 +684,14 @@ MIRBuilder.setInstr(*MBB.begin()); const AArch64TargetLowering &TLI = *getTLI(); - CCAssignFn *AssignFn = - TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false); + CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), IsWin64 && F.isVarArg()); AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn); FormalArgHandler Handler(MIRBuilder, MRI); - if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder, - F.getCallingConv(), F.isVarArg())) + SmallVector ArgLocs; + CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext()); + if (!determineAssignments(Assigner, SplitArgs, CCInfo) || + !handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, MIRBuilder)) return false; if (!BoolArgs.empty()) { @@ -622,10 +709,14 @@ AArch64FunctionInfo *FuncInfo = MF.getInfo(); uint64_t StackOffset = Assigner.StackOffset; if (F.isVarArg()) { - auto &Subtarget = MF.getSubtarget(); - if (!Subtarget.isTargetDarwin()) { - // FIXME: we need to reimplement saveVarArgsRegisters from - // AArch64ISelLowering. + if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) { + // The AAPCS variadic function ABI is identical to the non-variadic + // one. As a result there may be more arguments in registers and we should + // save them for future reference. + // Win64 variadic functions also pass arguments in registers, but all + // float arguments are passed in integer registers. + saveVarArgRegisters(MIRBuilder, Handler, CCInfo); + } else if (Subtarget.isWindowsArm64EC()) { return false; } @@ -657,7 +748,6 @@ // in this function later. FuncInfo->setBytesInStackArgArea(StackOffset); - auto &Subtarget = MF.getSubtarget(); if (Subtarget.hasCustomCallingConv()) Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -1940,10 +1940,18 @@ Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + int FrameIdx = FuncInfo->getVarArgsStackIndex(); + if (MF.getSubtarget().isCallingConvWin64( + MF.getFunction().getCallingConv())) { + FrameIdx = FuncInfo->getVarArgsGPRSize() > 0 + ? FuncInfo->getVarArgsGPRIndex() + : FuncInfo->getVarArgsStackIndex(); + } + auto MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) .addDef(ArgsAddrReg) - .addFrameIndex(FuncInfo->getVarArgsStackIndex()) + .addFrameIndex(FrameIdx) .addImm(0) .addImm(0); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll b/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s --global-isel=0 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s +; RUN: llc < %s --global-isel=1 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s --check-prefix=GISEL + +define void @va(i32 %count, half %f, ...) nounwind { +; CHECK-LABEL: va: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #176 +; CHECK-NEXT: stp x4, x5, [sp, #144] +; CHECK-NEXT: stp x2, x3, [sp, #128] +; CHECK-NEXT: str x1, [sp, #120] +; CHECK-NEXT: stp x6, x7, [sp, #160] +; CHECK-NEXT: stp q1, q2, [sp] +; CHECK-NEXT: stp q3, q4, [sp, #32] +; CHECK-NEXT: stp q5, q6, [sp, #64] +; CHECK-NEXT: str q7, [sp, #96] +; CHECK-NEXT: add sp, sp, #176 +; CHECK-NEXT: ret +; +; GISEL-LABEL: va: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: sub sp, sp, #176 +; GISEL-NEXT: stp x1, x2, [sp, #120] +; GISEL-NEXT: stp x3, x4, [sp, #136] +; GISEL-NEXT: stp x5, x6, [sp, #152] +; GISEL-NEXT: str x7, [sp, #168] +; GISEL-NEXT: stp q1, q2, [sp] +; GISEL-NEXT: stp q3, q4, [sp, #32] +; GISEL-NEXT: stp q5, q6, [sp, #64] +; GISEL-NEXT: str q7, [sp, #96] +; GISEL-NEXT: add sp, sp, #176 +; GISEL-NEXT: ret +entry: + ret void +} diff --git a/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll b/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll --- a/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll +++ b/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc < %s --global-isel=1 -mtriple=aarch64-apple-darwin | FileCheck %s --check-prefix=DARWIN define win64cc void @pass_va(i32 %count, ...) nounwind { ; CHECK-LABEL: pass_va: @@ -17,6 +18,12 @@ ; CHECK-NEXT: ldp x30, x18, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret +; +; DARWIN: ; %bb.0: ; %entry +; DARWIN-DAG: stp x3, x4, [sp, #56] +; DARWIN-DAG: stp x1, x2, [sp, #40] +; DARWIN-DAG: stp x5, x6, [sp, #72] +; DARWIN-DAG: str x7, [sp, #88] entry: %ap = alloca ptr, align 8 call void @llvm.va_start(ptr %ap) @@ -39,6 +46,16 @@ ; CHECK-NEXT: str x8, [sp, #8] ; CHECK-NEXT: ldr x18, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret +; +; DARWIN-LABEL: _f9: +; DARWIN: ; %bb.0: ; %entry +; DARWIN-NEXT: str x18, [sp, #-16]! ; 8-byte Folded Spill +; DARWIN-NEXT: add x8, sp, #8 +; DARWIN-NEXT: add x9, sp, #24 +; DARWIN-NEXT: str x9, [x8] +; DARWIN-NEXT: ldr x0, [sp, #8] +; DARWIN-NEXT: ldr x18, [sp], #16 ; 8-byte Folded Reload +; DARWIN-NEXT: ret entry: %ap = alloca ptr, align 8 call void @llvm.va_start(ptr %ap) @@ -55,6 +72,16 @@ ; CHECK-NEXT: str x8, [sp, #8] ; CHECK-NEXT: ldr x18, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret +; +; DARWIN-LABEL: _f8: +; DARWIN: ; %bb.0: ; %entry +; DARWIN-NEXT: str x18, [sp, #-16]! ; 8-byte Folded Spill +; DARWIN-NEXT: add x8, sp, #8 +; DARWIN-NEXT: add x9, sp, #16 +; DARWIN-NEXT: str x9, [x8] +; DARWIN-NEXT: ldr x0, [sp, #8] +; DARWIN-NEXT: ldr x18, [sp], #16 ; 8-byte Folded Reload +; DARWIN-NEXT: ret entry: %ap = alloca ptr, align 8 call void @llvm.va_start(ptr %ap) @@ -72,6 +99,17 @@ ; CHECK-NEXT: str x8, [sp, #8] ; CHECK-NEXT: ldr x18, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret +; +; DARWIN-LABEL: _f7: +; DARWIN: ; %bb.0: ; %entry +; DARWIN-NEXT: str x18, [sp, #-32]! ; 8-byte Folded Spill +; DARWIN-NEXT: add x8, sp, #8 +; DARWIN-NEXT: add x9, sp, #24 +; DARWIN-NEXT: str x7, [sp, #24] +; DARWIN-NEXT: str x9, [x8] +; DARWIN-NEXT: ldr x0, [sp, #8] +; DARWIN-NEXT: ldr x18, [sp], #32 ; 8-byte Folded Reload +; DARWIN-NEXT: ret entry: %ap = alloca ptr, align 8 call void @llvm.va_start(ptr %ap) diff --git a/llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll b/llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll --- a/llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll @@ -3,6 +3,15 @@ ; RUN: llc < %s -mtriple=aarch64-windows -verify-machineinstrs -O0 -fast-isel | FileCheck %s --check-prefixes=O0,FASTISEL ; RUN: llc < %s -mtriple=aarch64-windows -verify-machineinstrs -O0 -global-isel | FileCheck %s --check-prefixes=O0,GISEL +; Check that non-vararg functions compilation is not broken +define win64cc float @foo(float %arg) nounwind { +; GISEL-LABEL: foo: +; GISEL-NEXT: // %bb.0: // %entry +; GISEL-NEXT: ret +entry: + ret float %arg +} + define win64cc void @float_va_fn(float %a, i32 %b, ...) nounwind { ; DAGISEL-LABEL: float_va_fn: ; DAGISEL: // %bb.0: // %entry