diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h --- a/llvm/include/llvm/IR/CallingConv.h +++ b/llvm/include/llvm/IR/CallingConv.h @@ -235,6 +235,16 @@ /// Used for M68k interrupt routines. M68k_INTR = 101, + /// Calling convention used in the ARM64EC ABI to implement calls between + /// x64 code and thunks. This is basically the x64 calling convention using + /// ARM64 register names. The first parameter is mapped to x9. + ARM64EC_Thunk_X64 = 102, + + /// Calling convention used in the ARM64EC ABI to implement calls between + /// ARM64 code and thunks. This is just the ARM64 calling convention, + /// except that the first parameter is mapped to x9. + ARM64EC_Thunk_Native = 103, + /// The highest possible ID. Must be some 2^k - 1. MaxID = 1023 }; diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/llvm/lib/Target/AArch64/AArch64CallingConvention.h --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.h +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.h @@ -22,6 +22,12 @@ bool CC_AArch64_Arm64EC_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); +bool CC_AArch64_Arm64EC_Thunk(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); +bool CC_AArch64_Arm64EC_Thunk_Native(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); @@ -46,6 +52,9 @@ bool RetCC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); +bool RetCC_AArch64_Arm64EC_Thunk(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); bool RetCC_AArch64_WebKit_JS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -202,6 +202,118 @@ CCIfType<[i32, i64], CCAssignToStack<8, 8>>, ]>; +// Arm64EC thunks use a calling convention that's precisely the x64 calling +// convention, except that the registers have different names, and the callee +// address is passed in X9. +let Entry = 1 in +def CC_AArch64_Arm64EC_Thunk : CallingConv<[ + // Byval aggregates are passed by pointer + CCIfByVal>, + + // Promote i1/v1i1 arguments to i8. + CCIfType<[i1, v1i1], CCPromoteToType>, + + // The 'nest' parameter, if any, is passed in R10 (X4). + CCIfNest>, + + // A SwiftError is passed in R12 (X19). + CCIfSwiftError>>, + + // Pass SwiftSelf in R13 (X20). + CCIfSwiftSelf>>, + + // Pass SwiftAsync in an otherwise callee saved register so that calls to + // normal functions don't need to save it somewhere. + CCIfSwiftAsync>>, + + // The 'CFGuardTarget' parameter, if any, is passed in RAX (R8). + CCIfCFGuardTarget>, + + // 128 bit vectors are passed by pointer + CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64], CCPassIndirect>, + + // 256 bit vectors are passed by pointer + CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64], CCPassIndirect>, + + // 512 bit vectors are passed by pointer + CCIfType<[v64i8, v32i16, v16i32, v32f16, v16f32, v8f64, v8i64], CCPassIndirect>, + + // Long doubles are passed by pointer + CCIfType<[f80], CCPassIndirect>, + + // The first 4 MMX vector arguments are passed in GPRs. + CCIfType<[x86mmx], CCBitConvertToType>, + + // The first 4 FP/Vector arguments are passed in XMM registers. + CCIfType<[f16], + CCAssignToRegWithShadow<[H0, H1, H2, H3], + [X0, X1, X2, X2]>>, + CCIfType<[f32], + CCAssignToRegWithShadow<[S0, S1, S2, S3], + [X0, X1, X2, X2]>>, + CCIfType<[f64], + CCAssignToRegWithShadow<[D0, D1, D2, D3], + [X0, X1, X2, X2]>>, + + // The first 4 integer arguments are passed in integer registers. + CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3], + [Q0, Q1, Q2, Q3]>>, + + // Arm64EC thunks: the first argument is always a pointer to the destination + // address, stored in x9. + CCIfType<[i64], CCAssignToReg<[X9]>>, + + CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3], + [Q0, Q1, Q2, Q3]>>, + + // Integer/FP values get stored in stack slots that are 8 bytes in size and + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i8, i16, i32, i64, f16, f32, f64], CCAssignToStack<8, 8>> +]>; + +// The native side of ARM64EC thunks +let Entry = 1 in +def CC_AArch64_Arm64EC_Thunk_Native : CallingConv<[ + CCIfType<[i64], CCAssignToReg<[X9]>>, + CCDelegateTo +]>; + +let Entry = 1 in +def RetCC_AArch64_Arm64EC_Thunk : CallingConv<[ + // The X86-Win64 calling convention always returns __m64 values in RAX. + CCIfType<[x86mmx], CCBitConvertToType>, + + // Otherwise, everything is the same as 'normal' X86-64 C CC. + + // The X86-64 calling convention always returns FP values in XMM0. + CCIfType<[f16], CCAssignToReg<[H0, H1]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1]>>, + CCIfType<[f64], CCAssignToReg<[D0, D1]>>, + CCIfType<[f128], CCAssignToReg<[Q0, Q1]>>, + + CCIfSwiftError>>, + + // Scalar values are returned in AX first, then DX. For i8, the ABI + // requires the values to be in AL and AH, however this code uses AL and DL + // instead. This is because using AH for the second register conflicts with + // the way LLVM does multiple return values -- a return of {i16,i8} would end + // up in AX and AH, which overlap. Front-ends wishing to conform to the ABI + // for functions that return two i8 values are currently expected to pack the + // values into an i16 (which uses AX, and thus AL:AH). + // + // For code that doesn't care about the ABI, we allow returning more than two + // integer values in registers. + CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i32], CCAssignToReg<[W8, W1, W0]>>, + CCIfType<[i64], CCAssignToReg<[X8, X1, X0]>>, + + // Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3 + // can only be used by ABI non-compliant code. If the target doesn't have XMM + // registers, it won't have vector types. + CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64], + CCAssignToReg<[Q0, Q1, Q2, Q3]>> +]>; + // Windows Control Flow Guard checks take a single argument (the target function // address) and have no return value. let Entry = 1 in diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5821,25 +5821,35 @@ return CC_AArch64_DarwinPCS; return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg : CC_AArch64_DarwinPCS_VarArg; - case CallingConv::Win64: - if (IsVarArg) { - if (Subtarget->isWindowsArm64EC()) - return CC_AArch64_Arm64EC_VarArg; - return CC_AArch64_Win64_VarArg; - } - return CC_AArch64_AAPCS; - case CallingConv::CFGuard_Check: - return CC_AArch64_Win64_CFGuard_Check; - case CallingConv::AArch64_VectorCall: - case CallingConv::AArch64_SVE_VectorCall: - return CC_AArch64_AAPCS; + case CallingConv::Win64: + if (IsVarArg) { + if (Subtarget->isWindowsArm64EC()) + return CC_AArch64_Arm64EC_VarArg; + return CC_AArch64_Win64_VarArg; + } + return CC_AArch64_AAPCS; + case CallingConv::CFGuard_Check: + return CC_AArch64_Win64_CFGuard_Check; + case CallingConv::AArch64_VectorCall: + case CallingConv::AArch64_SVE_VectorCall: + return CC_AArch64_AAPCS; + case CallingConv::ARM64EC_Thunk_X64: + return CC_AArch64_Arm64EC_Thunk; + case CallingConv::ARM64EC_Thunk_Native: + return CC_AArch64_Arm64EC_Thunk_Native; } } CCAssignFn * AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const { - return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS - : RetCC_AArch64_AAPCS; + switch (CC) { + default: + return RetCC_AArch64_AAPCS; + case CallingConv::WebKit_JS: + return RetCC_AArch64_WebKit_JS; + case CallingConv::ARM64EC_Thunk_X64: + return RetCC_AArch64_Arm64EC_Thunk; + } } SDValue AArch64TargetLowering::LowerFormalArguments( @@ -5850,6 +5860,8 @@ const Function &F = MF.getFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); bool IsWin64 = Subtarget->isCallingConvWin64(F.getCallingConv()); + bool StackViaX4 = CallConv == CallingConv::ARM64EC_Thunk_X64 || + (isVarArg && Subtarget->isWindowsArm64EC()); AArch64FunctionInfo *FuncInfo = MF.getInfo(); SmallVector Outs; @@ -5994,10 +6006,14 @@ SDValue FIN; MachinePointerInfo PtrInfo; - if (isVarArg && Subtarget->isWindowsArm64EC()) { - // In the ARM64EC varargs convention, fixed arguments on the stack are - // accessed relative to x4, not sp. + if (StackViaX4) { + // In both the ARM64EC varargs convention and the thunk convention, + // arguments on the stack are accessed relative to x4, not sp. In + // the thunk convention, there's an additional offset of 32 bytes + // to account for the shadow store. unsigned ObjOffset = ArgOffset + BEAlign; + if (CallConv == CallingConv::ARM64EC_Thunk_X64) + ObjOffset += 32; Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass); SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64); FIN = DAG.getNode(ISD::ADD, DL, MVT::i64, Val, @@ -6145,9 +6161,12 @@ // On Windows, InReg pointers must be returned, so record the pointer in a // virtual register at the start of the function so it can be returned in the // epilogue. - if (IsWin64) { + if (IsWin64 || + MF.getFunction().getCallingConv() == CallingConv::ARM64EC_Thunk_X64) { for (unsigned I = 0, E = Ins.size(); I != E; ++I) { - if (Ins[I].Flags.isInReg()) { + if (MF.getFunction().getCallingConv() == CallingConv::ARM64EC_Thunk_X64 + ? Ins[I].Flags.isSRet() + : Ins[I].Flags.isInReg()) { assert(!FuncInfo->getSRetReturnReg()); MVT PtrTy = getPointerTy(DAG.getDataLayout()); @@ -6382,6 +6401,11 @@ const SmallVector &Outs = CLI.Outs; bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC); + // For Arm64EC thunks, allocate 32 extra bytes at the bottom of the stack + // for the shadow store. + if (CalleeCC == CallingConv::ARM64EC_Thunk_X64) + CCInfo.AllocateStack(32, Align(16)); + unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT ArgVT = Outs[i].VT; @@ -7217,6 +7241,8 @@ getPointerTy(MF.getDataLayout())); unsigned RetValReg = AArch64::X0; + if (CallConv == CallingConv::ARM64EC_Thunk_X64) + RetValReg = AArch64::X8; Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag); Flag = Chain.getValue(1);