Index: llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -59,14 +59,38 @@ } // end anonymous namespace Function *AArch64Arm64ECCallLowering::buildExitThunk(CallBase *CB) { - Type *RetTy = CB->getFunctionType()->getReturnType(); + FunctionType *FT = CB->getFunctionType(); + Type *RetTy = FT->getReturnType(); + bool IsVarArg = FT->isVarArg(); + Type *PtrTy = Type::getInt8PtrTy(M->getContext()); + Type *I64Ty = Type::getInt64Ty(M->getContext()); + SmallVector DefArgTypes; // The first argument to a thunk is the called function, stored in x9. // (Normally, we won't explicitly refer to this in the assembly; it just // gets passed on by the call.) - DefArgTypes.push_back(Type::getInt8PtrTy(M->getContext())); - for (unsigned i = 0; i < CB->arg_size(); ++i) { - DefArgTypes.push_back(CB->getArgOperand(i)->getType()); + DefArgTypes.push_back(PtrTy); + + if (IsVarArg) { + // We treat the variadic function's exit thunk as a normal function + // with type: + // rettype exitthunk( + // ptr x9, ptr x0, i64 x1, i64 x2, i64 x3, ptr x4, i64 x5) + // that can coverage all types of variadic function. + // x9 is similar to normal exit thunk, store the called function. + // x0-x3 is the arguments be stored in registers. + // x4 is the address of the arguments on the stack. + // x5 is the size of the arguments on the stack. + DefArgTypes.push_back(PtrTy); + for (int i = 0; i < 3; i++) + DefArgTypes.push_back(I64Ty); + + DefArgTypes.push_back(PtrTy); + DefArgTypes.push_back(I64Ty); + } else { + for (unsigned i = 0; i < CB->arg_size(); ++i) { + DefArgTypes.push_back(CB->getArgOperand(i)->getType()); + } } FunctionType *Ty = FunctionType::get(RetTy, DefArgTypes, false); Function *F = @@ -139,12 +163,13 @@ } else { Args.push_back(&Arg); } - ArgTypes.push_back(Args.back()->getType()); + if (!IsVarArg) + ArgTypes.push_back(Args.back()->getType()); } // FIXME: Transfer necessary attributes? sret? anything else? // FIXME: Try to share thunks. This probably involves simplifying the // argument types (translating all integers/pointers to i64, etc.) - auto *CallTy = FunctionType::get(X64RetType, ArgTypes, false); + auto *CallTy = FunctionType::get(X64RetType, ArgTypes, IsVarArg); Callee = IRB.CreateBitCast(Callee, CallTy->getPointerTo(0)); CallInst *Call = IRB.CreateCall(CallTy, Callee, Args); Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -907,6 +907,10 @@ unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL, SelectionDAG &DAG, Register &Reg) const; + SDValue varArgCopyForExitThunk(SelectionDAG &DAG, SDLoc &DL, SDValue Chain, + SmallVector &OutVals, + bool RetStack) const; + SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1469,6 +1469,13 @@ PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); IsStrictFPEnabled = true; + + if (Subtarget->isWindowsArm64EC()) { + // FIXME: are there other intrinsics we need to add here? + setLibcallName(RTLIB::MEMCPY, "#memcpy"); + setLibcallName(RTLIB::MEMSET, "#memset"); + setLibcallName(RTLIB::MEMMOVE, "#memmove"); + } } void AArch64TargetLowering::addTypeForNEON(MVT VT) { @@ -6564,7 +6571,8 @@ // For Arm64EC thunks, allocate 32 extra bytes at the bottom of the stack // for the shadow store. - if (CalleeCC == CallingConv::ARM64EC_Thunk_X64) + // Variadic function allocate 32 extra bytes in the dynamic allocation + if (CalleeCC == CallingConv::ARM64EC_Thunk_X64 && !IsVarArg) CCInfo.AllocateStack(32, Align(16)); unsigned NumArgs = Outs.size(); @@ -6824,6 +6832,59 @@ return DAG.getNode(Opcode, DL, DAG.getVTList(MVT::Other, MVT::Glue), Ops); } +// Variadic function's exit thunk need to allocate an allocation on +// the bottom of current stack as callee's real arguments on the stack, +// then copy caller's arguments on the stack to the allocation . +SDValue AArch64TargetLowering::varArgCopyForExitThunk( + SelectionDAG &DAG, SDLoc &DL, SDValue Chain, + SmallVector &OutVals, bool RetStack) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + + // Memory addresss of the arguments on the stack. + SDValue X4Stack = OutVals[OutVals.size() - 2]; + // Size of the arguments on the stack. + SDValue X5Length = OutVals[OutVals.size() - 1]; + + // 32 extra bytes shadow register + // 8 extra bytes to store x3 + int64_t ExtraAlloc = 32 + (RetStack ? 8 : 0); + SDValue AlignC = DAG.getConstant(0, DL, MVT::i64); + SDValue AddC = DAG.getConstant(15 + ExtraAlloc, DL, MVT::i64); + SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, X5Length, AddC); + + // Dynamic stack wiil align the size to 16btyes. + // It looks Microsoft not only align the size to 16bytes, + // but also align (-1,-15) to -16. We don't know why so for + // now we don't add this part. + SDValue Ops[] = {Chain, Add, AlignC}; + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + SDValue Buffer = DAG.getNode(ISD::DYNAMIC_STACKALLOC, DL, VTs, Ops); + unsigned FI = MFI.CreateVariableSizedObject(Align(16), nullptr); + Register Reg = + MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); + Chain = DAG.getCopyToReg(Buffer.getValue(1), DL, Reg, Buffer.getValue(0)); + MachinePointerInfo PtrInfo = MachinePointerInfo::getStack(MF, FI); + SDValue Ptr = DAG.getObjectPtrOffset(DL, Buffer, TypeSize::Fixed(32)); + + // When varargs function returns the value in a register on AArch64, + // but requires an “sret” return on x64, we need to shuffle around + // the argument registers, and store x3 to the stack. + if (RetStack) { + Chain = DAG.getStore(Chain, DL, OutVals[5], Ptr, PtrInfo.getWithOffset(32)); + Ptr = DAG.getObjectPtrOffset(DL, Buffer, TypeSize::Fixed(ExtraAlloc)); + } + + SDValue Cpy = + DAG.getMemcpy(Chain, DL, Ptr, X4Stack, X5Length, Align(8), + /*isVol = */ false, /*AlwaysInline = */ false, + /*isTailCall = */ false, PtrInfo.getWithOffset(ExtraAlloc), + MachinePointerInfo()); + Chain = Cpy.getValue(1); + + return Chain; +} + /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain, /// and add input and output parameter nodes. SDValue @@ -6839,6 +6900,7 @@ bool &IsTailCall = CLI.IsTailCall; CallingConv::ID &CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; + bool IsArm64EcThunk = CallConv == CallingConv::ARM64EC_Thunk_X64; MachineFunction &MF = DAG.getMachineFunction(); MachineFunction::CallSiteInfo CSInfo; @@ -6899,6 +6961,11 @@ report_fatal_error("Passing SVE types to variadic functions is " "currently not supported"); } + + // Variadic exit thunk only need first 5 parameters to lower call itself. + // Last 2 arguments are the stack address and size. + if (IsArm64EcThunk) + Outs.resize(5); } analyzeCallOperands(*this, Subtarget, CLI, CCInfo); @@ -6999,6 +7066,9 @@ SmallVector MemOpChains; auto PtrVT = getPointerTy(DAG.getDataLayout()); + if (IsVarArg && IsArm64EcThunk) + Chain = varArgCopyForExitThunk(DAG, DL, Chain, OutVals, Outs[1].IsFixed); + if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) { const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); for (const auto &F : Forwards) { @@ -7159,6 +7229,28 @@ if (Options.EmitCallSiteInfo) CSInfo.emplace_back(VA.getLocReg(), i); } + + if (IsVarArg && IsArm64EcThunk) { + // Float parameters are passed in both int and float register + Register ShadowReg; + switch (VA.getLocReg()) { + case AArch64::X0: + ShadowReg = AArch64::D0; + break; + case AArch64::X1: + ShadowReg = AArch64::D1; + break; + case AArch64::X2: + ShadowReg = AArch64::D2; + break; + case AArch64::X3: + ShadowReg = AArch64::D3; + break; + } + if (ShadowReg) + RegsToPass.push_back(std::make_pair( + ShadowReg, DAG.getRegister(VA.getLocReg(), MVT::i64))); + } } else { assert(VA.isMemLoc()); @@ -7228,7 +7320,7 @@ } } - if (IsVarArg && Subtarget->isWindowsArm64EC()) { + if (IsVarArg && Subtarget->isWindowsArm64EC() && !IsArm64EcThunk) { // For vararg calls, the Arm64EC ABI requires values in x4 and x5 // describing the argument list. x4 contains the address of the // first stack parameter. x5 contains the size in bytes of all parameters Index: llvm/test/CodeGen/AArch64/arm64ec-cfg.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64ec-cfg.ll +++ llvm/test/CodeGen/AArch64/arm64ec-cfg.ll @@ -19,6 +19,30 @@ ret void } +define [2 x i64] @f4(ptr %g) { +entry: + %r = call [2 x i64] %g([4 x float] zeroinitializer) + ret [2 x i64] %r +} + +define void @fvar(ptr %g) { +entry: + call void (i32, ...) %g(i32 4, i32 5, i32 6, i32 8) + ret void +} + +define i32 @fvar2(ptr %g) { +entry: + %r = call i32 (i32, ...) %g(i32 4, i32 5, i32 6, i32 8, i32 7, i32 9) + ret i32 %r +} + +define [2 x i64] @fvar3(ptr %g) { +entry: + %r = call [2 x i64] (i32, ...) %g(i32 4, i32 5, i32 6, i32 8, i32 7, i32 9) + ret [2 x i64] %r +} + ; CHECK-LABEL: f: ; CHECK: .seh_proc f ; CHECK-NEXT: // %bb.0: // %entry @@ -91,6 +115,128 @@ ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_endproc ; +; CHECK-LABEL: f4: +; CHECK: .seh_proc f4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, __os_arm64x_check_icall +; CHECK-NEXT: adrp x10, thunk.3 +; CHECK-NEXT: add x10, x10, :lo12:thunk.3 +; CHECK-NEXT: mov x11, x0 +; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] +; CHECK-NEXT: blr x8 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: movi d1, #0000000000000000 +; CHECK-NEXT: movi d2, #0000000000000000 +; CHECK-NEXT: movi d3, #0000000000000000 +; CHECK-NEXT: blr x11 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +; +; CHECK-LABEL: fvar: +; CHECK: .seh_proc fvar +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, __os_arm64x_check_icall +; CHECK-NEXT: adrp x10, thunk.4 +; CHECK-NEXT: add x10, x10, :lo12:thunk.4 +; CHECK-NEXT: mov x11, x0 +; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] +; CHECK-NEXT: blr x8 +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: mov w1, #5 +; CHECK-NEXT: mov w2, #6 +; CHECK-NEXT: mov w3, #8 +; CHECK-NEXT: mov x4, sp +; CHECK-NEXT: mov x5, xzr +; CHECK-NEXT: blr x11 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x30, 16 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +; +; CHECK-LABEL: fvar2: +; CHECK: .seh_proc fvar2 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg x30, 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, __os_arm64x_check_icall +; CHECK-NEXT: adrp x10, thunk.5 +; CHECK-NEXT: add x10, x10, :lo12:thunk.5 +; CHECK-NEXT: mov x11, x0 +; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] +; CHECK-NEXT: blr x8 +; CHECK-NEXT: mov x4, sp +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: mov w9, #7 +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: mov w1, #5 +; CHECK-NEXT: mov w2, #6 +; CHECK-NEXT: mov w3, #8 +; CHECK-NEXT: mov w5, #16 +; CHECK-NEXT: str w8, [sp, #8] +; CHECK-NEXT: str w9, [sp] +; CHECK-NEXT: blr x11 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg x30, 16 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +; +; CHECK-LABEL: fvar3: +; CHECK: .seh_proc fvar3 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg x30, 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, __os_arm64x_check_icall +; CHECK-NEXT: adrp x10, thunk.6 +; CHECK-NEXT: add x10, x10, :lo12:thunk.6 +; CHECK-NEXT: mov x11, x0 +; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] +; CHECK-NEXT: blr x8 +; CHECK-NEXT: mov x4, sp +; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: mov w9, #7 +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: mov w1, #5 +; CHECK-NEXT: mov w2, #6 +; CHECK-NEXT: mov w3, #8 +; CHECK-NEXT: mov w5, #16 +; CHECK-NEXT: str w8, [sp, #8] +; CHECK-NEXT: str w9, [sp] +; CHECK-NEXT: blr x11 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg x30, 16 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +; ; CHECK-LABEL: thunk: ; CHECK: .seh_proc thunk ; CHECK-NEXT: // %bb.0: @@ -164,3 +310,204 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_endproc +; +; CHECK-LABEL: thunk.3: +; CHECK: .seh_proc thunk.3 +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: .seh_stackalloc 80 +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 64 +; CHECK-NEXT: add x29, sp, #64 +; CHECK-NEXT: .seh_add_fp 64 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: add x1, sp, #32 +; CHECK-NEXT: stp s1, s2, [sp, #36] +; CHECK-NEXT: str s0, [sp, #32] +; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] +; CHECK-NEXT: str s3, [sp, #44] +; CHECK-NEXT: blr x8 +; CHECK-NEXT: ldp x0, x1, [x29, #-16] +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 64 +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: .seh_stackalloc 80 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +; +; CHECK-LABEL: thunk.4: +; CHECK: .seh_proc thunk.4 +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: stp x19, x20, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp_x x19, 64 +; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp x21, 16 +; CHECK-NEXT: stp x25, x26, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp x25, 32 +; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 48 +; CHECK-NEXT: add x29, sp, #48 +; CHECK-NEXT: .seh_add_fp 48 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect +; CHECK-NEXT: mov x19, x3 +; CHECK-NEXT: mov x20, x2 +; CHECK-NEXT: mov x21, x1 +; CHECK-NEXT: mov x22, x0 +; CHECK-NEXT: mov x25, x9 +; CHECK-NEXT: ldr x26, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] +; CHECK-NEXT: add x8, x5, #47 +; CHECK-NEXT: lsr x15, x8, #4 +; CHECK-NEXT: bl __chkstk_arm64ec +; CHECK-NEXT: sub x8, sp, x15, lsl #4 +; CHECK-NEXT: add x0, x8, #32 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: mov x1, x4 +; CHECK-NEXT: mov x2, x5 +; CHECK-NEXT: bl "#memcpy" +; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: mov x1, x21 +; CHECK-NEXT: mov x2, x20 +; CHECK-NEXT: mov x3, x19 +; CHECK-NEXT: mov x9, x25 +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: fmov d1, x1 +; CHECK-NEXT: fmov d2, x2 +; CHECK-NEXT: fmov d3, x3 +; CHECK-NEXT: blr x26 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: sub sp, x29, #48 +; CHECK-NEXT: .seh_add_fp 48 +; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 48 +; CHECK-NEXT: ldp x25, x26, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp x25, 32 +; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp x21, 16 +; CHECK-NEXT: ldp x19, x20, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp_x x19, 64 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +; +; CHECK-LABEL: thunk.5: +; CHECK: .seh_proc thunk.5 +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: stp x19, x20, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp_x x19, 64 +; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp x21, 16 +; CHECK-NEXT: stp x25, x26, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp x25, 32 +; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 48 +; CHECK-NEXT: add x29, sp, #48 +; CHECK-NEXT: .seh_add_fp 48 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect +; CHECK-NEXT: mov x19, x3 +; CHECK-NEXT: mov x20, x2 +; CHECK-NEXT: mov x21, x1 +; CHECK-NEXT: mov x22, x0 +; CHECK-NEXT: mov x25, x9 +; CHECK-NEXT: ldr x26, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] +; CHECK-NEXT: add x8, x5, #47 +; CHECK-NEXT: lsr x15, x8, #4 +; CHECK-NEXT: bl __chkstk_arm64ec +; CHECK-NEXT: sub x8, sp, x15, lsl #4 +; CHECK-NEXT: add x0, x8, #32 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: mov x1, x4 +; CHECK-NEXT: mov x2, x5 +; CHECK-NEXT: bl "#memcpy" +; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: mov x1, x21 +; CHECK-NEXT: mov x2, x20 +; CHECK-NEXT: mov x3, x19 +; CHECK-NEXT: mov x9, x25 +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: fmov d1, x1 +; CHECK-NEXT: fmov d2, x2 +; CHECK-NEXT: fmov d3, x3 +; CHECK-NEXT: blr x26 +; CHECK-NEXT: mov w0, w8 +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: sub sp, x29, #48 +; CHECK-NEXT: .seh_add_fp 48 +; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 48 +; CHECK-NEXT: ldp x25, x26, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp x25, 32 +; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp x21, 16 +; CHECK-NEXT: ldp x19, x20, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp_x x19, 64 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc +; +; CHECK-LABEL: thunk.6: +; CHECK: .seh_proc thunk.6 +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: stp x19, x20, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp_x x19, 64 +; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp x21, 16 +; CHECK-NEXT: str x25, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg x25, 32 +; CHECK-NEXT: stp x29, x30, [sp, #40] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 40 +; CHECK-NEXT: add x29, sp, #40 +; CHECK-NEXT: .seh_add_fp 40 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .seh_stackalloc 16 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect +; CHECK-NEXT: mov x19, x2 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x9 +; CHECK-NEXT: ldr x25, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] +; CHECK-NEXT: add x8, x5, #55 +; CHECK-NEXT: lsr x15, x8, #4 +; CHECK-NEXT: bl __chkstk_arm64ec +; CHECK-NEXT: sub x8, sp, x15, lsl #4 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: add x0, x8, #40 +; CHECK-NEXT: mov x1, x4 +; CHECK-NEXT: mov x2, x5 +; CHECK-NEXT: str x3, [x8, #32] +; CHECK-NEXT: bl "#memcpy" +; CHECK-NEXT: sub x0, x29, #56 +; CHECK-NEXT: mov x1, x21 +; CHECK-NEXT: mov x2, x20 +; CHECK-NEXT: mov x3, x19 +; CHECK-NEXT: mov x9, x22 +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: fmov d1, x1 +; CHECK-NEXT: fmov d2, x2 +; CHECK-NEXT: fmov d3, x3 +; CHECK-NEXT: blr x25 +; CHECK-NEXT: ldp x0, x1, [x29, #-56] +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: sub sp, x29, #40 +; CHECK-NEXT: .seh_add_fp 40 +; CHECK-NEXT: ldp x29, x30, [sp, #40] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 40 +; CHECK-NEXT: ldr x25, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg x25, 32 +; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp x21, 16 +; CHECK-NEXT: ldp x19, x20, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp_x x19, 64 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc