Index: llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp +++ llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp @@ -59,18 +59,43 @@ } // end anonymous namespace Function *AArch64Arm64ECCallLowering::buildExitThunk(CallBase *CB) { - Type *RetTy = CB->getFunctionType()->getReturnType(); + auto &DL = M->getDataLayout(); + FunctionType *FT = CB->getFunctionType(); + Type *RetTy = FT->getReturnType(); + bool IsVarArg = FT->isVarArg(); + Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); + Type *I64Ty = Type::getInt64Ty(M->getContext()); + SmallVector DefArgTypes; // The first argument to a thunk is the called function, stored in x9. // (Normally, we won't explicitly refer to this in the assembly; it just // gets passed on by the call.) - DefArgTypes.push_back(Type::getInt8PtrTy(M->getContext())); - for (unsigned i = 0; i < CB->arg_size(); ++i) { - DefArgTypes.push_back(CB->getArgOperand(i)->getType()); + DefArgTypes.push_back(I8PtrTy); + + if (IsVarArg) { + // We treat the variadic function's exit thunk as a normal function + // with type: + // rettype exitthunk( + // i8* x9, i64 x0, i64 x1, i64 x2, i64 x3, i8* x4, i64 x5) + // that can coverage all types of variadic function. + // x9 is similar to normal exit thunk, store the called function. + // x0-x3 is the arguments be stored in registers. + // x4 is the address of the arguments on the stack. + // x5 is the size of the arguments on the stack. + for (int i = 0; i < 4; i++) + DefArgTypes.push_back(I64Ty); + + DefArgTypes.push_back(I8PtrTy); + DefArgTypes.push_back(I64Ty); + } else { + for (unsigned i = 0; i < CB->arg_size(); ++i) { + DefArgTypes.push_back(CB->getArgOperand(i)->getType()); + } } + FunctionType *Ty = FunctionType::get(RetTy, DefArgTypes, false); Function *F = - Function::Create(Ty, GlobalValue::InternalLinkage, 0, "thunk", M); + Function::Create(Ty, GlobalValue::InternalLinkage, 0, "exit_thunk", M); F->setCallingConv(CallingConv::ARM64EC_Thunk_Native); // Copy MSVC, and always set up a frame pointer. (Maybe this isn't necessary.) F->addFnAttr("frame-pointer", "all"); @@ -86,11 +111,6 @@ // C ABI, but might show up in other cases. BasicBlock *BB = BasicBlock::Create(M->getContext(), "", F); IRBuilder<> IRB(BB); - PointerType *DispatchPtrTy = FunctionType::get(IRB.getVoidTy(), false)->getPointerTo(0); - Value *CalleePtr = M->getOrInsertGlobal( - "__os_arm64x_dispatch_call_no_redirect", DispatchPtrTy); - Value *Callee = IRB.CreateLoad(DispatchPtrTy, CalleePtr); - auto &DL = M->getDataLayout(); SmallVector Args; SmallVector ArgTypes; @@ -98,6 +118,7 @@ Args.push_back(F->arg_begin()); ArgTypes.push_back(Args.back()->getType()); + bool RetStack = false; Type *X64RetType = RetTy; if (RetTy->isArrayTy() || RetTy->isStructTy()) { // If the return type is an array or struct, translate it. Values of size @@ -107,12 +128,18 @@ Args.push_back(IRB.CreateAlloca(RetTy)); ArgTypes.push_back(Args.back()->getType()); X64RetType = IRB.getVoidTy(); + RetStack = true; } else { X64RetType = IRB.getIntNTy(DL.getTypeStoreSizeInBits(RetTy)); } } - for (auto &Arg : make_range(F->arg_begin() + 1, F->arg_end())) { + // The called function is variadic function, we can't pass x4(stack pointer) + // and x5(stack size) into the function type. There are part of calling conv. + auto ArgRange = + make_range(F->arg_begin() + 1, + IsVarArg ? F->arg_end() - (RetStack ? 3 : 2) : F->arg_end()); + for (auto &Arg : ArgRange) { // Translate arguments from AArch64 calling convention to x86 calling // convention. // @@ -138,13 +165,52 @@ } else { Args.push_back(&Arg); } - ArgTypes.push_back(Args.back()->getType()); + if (!IsVarArg) + ArgTypes.push_back(Args.back()->getType()); + } + + if (IsVarArg) { + // Memory addresss of the arguments on the stack. + Value *Src = F->arg_begin() + 5; + // Size of the arguments on the stack. + Value *SrcLength = F->arg_begin() + 6; + + // Align the size to 16btyes. + // It looks Microsoft not only align the size to 16bytes, + // but also align (-1,-15) to -16. We don't know why so for + // now we don't add this part. + Constant *AddC = ConstantInt::get(I64Ty, 15 + (RetStack ? 8 : 0)); + Constant *NegC = ConstantInt::get(I64Ty, -16ll); + Value *Add = IRB.CreateAdd(SrcLength, AddC); + Value *Length = IRB.CreateAnd(Add, NegC); + + // FIXME: the allocation should be on the stack's bottom. + // For now the code here should assume we have no other dynamic + // allocation after the alloca inst. The assumption is fragile + // so we need to use another way to allocate it to make sure it + // is on the stack's bottom. + Type *I8Ty = Type::getInt8Ty(M->getContext()); + AllocaInst *AI = IRB.CreateAlloca(I8Ty, Length); + AI->setAlignment(DL.getPrefTypeAlign(I64Ty)); + Value *Dst = AI; + if (RetStack) { + IRB.CreateStore(F->arg_begin() + 4, AI); + Dst = IRB.CreateGEP(I8Ty, AI, ConstantInt::get(I64Ty, 8)); + } + IRB.CreateMemCpy(Dst, Dst->getPointerAlignment(DL), Src, + Src->getPointerAlignment(DL), SrcLength); } + // FIXME: Transfer necessary attributes? sret? anything else? // FIXME: Try to share thunks. This probably involves simplifying the // argument types (translating all integers/pointers to i64, etc.) - auto *CallTy = FunctionType::get(X64RetType, ArgTypes, false); + auto *CallTy = FunctionType::get(X64RetType, ArgTypes, IsVarArg); + PointerType *DispatchPtrTy = + FunctionType::get(IRB.getVoidTy(), false)->getPointerTo(0); + Value *CalleePtr = M->getOrInsertGlobal( + "__os_arm64x_dispatch_call_no_redirect", DispatchPtrTy); + Value *Callee = IRB.CreateLoad(DispatchPtrTy, CalleePtr); Callee = IRB.CreateBitCast(Callee, CallTy->getPointerTo(0)); CallInst *Call = IRB.CreateCall(CallTy, Callee, Args); Call->setCallingConv(CallingConv::ARM64EC_Thunk_X64); Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1430,6 +1430,13 @@ PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); IsStrictFPEnabled = true; + + if (Subtarget->isWindowsArm64EC()) { + // FIXME: are there other intrinsics we need to add here? + setLibcallName(RTLIB::MEMCPY, "#memcpy"); + setLibcallName(RTLIB::MEMSET, "#memset"); + setLibcallName(RTLIB::MEMMOVE, "#memmove"); + } } void AArch64TargetLowering::addTypeForNEON(MVT VT) { @@ -6553,6 +6560,7 @@ bool &IsTailCall = CLI.IsTailCall; CallingConv::ID &CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; + bool IsArm64EcThunk = CallConv == CallingConv::ARM64EC_Thunk_X64; MachineFunction &MF = DAG.getMachineFunction(); MachineFunction::CallSiteInfo CSInfo; @@ -6826,6 +6834,27 @@ if (Options.EmitCallSiteInfo) CSInfo.emplace_back(VA.getLocReg(), i); } + + if (IsVarArg && IsArm64EcThunk) { + // Float parameters are passed in both int and float register + Register ShadowReg; + switch (VA.getLocReg()) { + case AArch64::X0: + ShadowReg = AArch64::D0; + break; + case AArch64::X1: + ShadowReg = AArch64::D1; + break; + case AArch64::X2: + ShadowReg = AArch64::D2; + break; + case AArch64::X3: + ShadowReg = AArch64::D3; + break; + } + if (ShadowReg) + RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); + } } else { assert(VA.isMemLoc()); @@ -6895,7 +6924,7 @@ } } - if (IsVarArg && Subtarget->isWindowsArm64EC()) { + if (IsVarArg && Subtarget->isWindowsArm64EC() && !IsArm64EcThunk) { // For vararg calls, the Arm64EC ABI requires values in x4 and x5 // describing the argument list. x4 contains the address of the // first stack parameter. x5 contains the size in bytes of all parameters Index: llvm/test/CodeGen/AArch64/arm64ec-cfg.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64ec-cfg.ll +++ llvm/test/CodeGen/AArch64/arm64ec-cfg.ll @@ -50,8 +50,8 @@ ; CHECK-NEXT: .seh_save_reg_x x30, 16 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x8, __os_arm64x_check_icall -; CHECK-NEXT: adrp x10, thunk -; CHECK-NEXT: add x10, x10, :lo12:thunk +; CHECK-NEXT: adrp x10, exit_thunk +; CHECK-NEXT: add x10, x10, :lo12:exit_thunk ; CHECK-NEXT: mov x11, x0 ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] ; CHECK-NEXT: blr x8 @@ -71,8 +71,8 @@ ; CHECK-NEXT: .seh_save_reg_x x30, 16 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x8, __os_arm64x_check_icall -; CHECK-NEXT: adrp x10, thunk.1 -; CHECK-NEXT: add x10, x10, :lo12:thunk.1 +; CHECK-NEXT: adrp x10, exit_thunk.1 +; CHECK-NEXT: add x10, x10, :lo12:exit_thunk.1 ; CHECK-NEXT: mov x11, x0 ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] ; CHECK-NEXT: blr x8 @@ -97,8 +97,8 @@ ; CHECK-NEXT: .seh_save_reg_x x30, 16 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x8, __os_arm64x_check_icall -; CHECK-NEXT: adrp x10, thunk.2 -; CHECK-NEXT: add x10, x10, :lo12:thunk.2 +; CHECK-NEXT: adrp x10, exit_thunk.2 +; CHECK-NEXT: add x10, x10, :lo12:exit_thunk.2 ; CHECK-NEXT: mov x11, x0 ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] ; CHECK-NEXT: blr x8 @@ -122,8 +122,8 @@ ; CHECK-NEXT: .seh_save_reg_x x30, 16 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x8, __os_arm64x_check_icall -; CHECK-NEXT: adrp x10, thunk.3 -; CHECK-NEXT: add x10, x10, :lo12:thunk.3 +; CHECK-NEXT: adrp x10, exit_thunk.3 +; CHECK-NEXT: add x10, x10, :lo12:exit_thunk.3 ; CHECK-NEXT: mov x11, x0 ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] ; CHECK-NEXT: blr x8 @@ -147,8 +147,8 @@ ; CHECK-NEXT: .seh_save_reg_x x30, 16 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x8, __os_arm64x_check_icall -; CHECK-NEXT: adrp x10, thunk.4 -; CHECK-NEXT: add x10, x10, :lo12:thunk.4 +; CHECK-NEXT: adrp x10, exit_thunk.4 +; CHECK-NEXT: add x10, x10, :lo12:exit_thunk.4 ; CHECK-NEXT: mov x11, x0 ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] ; CHECK-NEXT: blr x8 @@ -176,8 +176,8 @@ ; CHECK-NEXT: .seh_save_reg x30, 16 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x8, __os_arm64x_check_icall -; CHECK-NEXT: adrp x10, thunk.5 -; CHECK-NEXT: add x10, x10, :lo12:thunk.5 +; CHECK-NEXT: adrp x10, exit_thunk.5 +; CHECK-NEXT: add x10, x10, :lo12:exit_thunk.5 ; CHECK-NEXT: mov x11, x0 ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] ; CHECK-NEXT: blr x8 @@ -211,8 +211,8 @@ ; CHECK-NEXT: .seh_save_reg x30, 16 ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x8, __os_arm64x_check_icall -; CHECK-NEXT: adrp x10, thunk.6 -; CHECK-NEXT: add x10, x10, :lo12:thunk.6 +; CHECK-NEXT: adrp x10, exit_thunk.6 +; CHECK-NEXT: add x10, x10, :lo12:exit_thunk.6 ; CHECK-NEXT: mov x11, x0 ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_check_icall] ; CHECK-NEXT: blr x8 @@ -237,8 +237,8 @@ ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_endproc ; -; CHECK-LABEL: thunk: -; CHECK: .seh_proc thunk +; CHECK-LABEL: exit_thunk: +; CHECK: .seh_proc exit_thunk ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: .seh_stackalloc 48 @@ -260,8 +260,8 @@ ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_endproc ; -; CHECK-LABEL: thunk.1: -; CHECK: .seh_proc thunk.1 +; CHECK-LABEL: exit_thunk.1: +; CHECK: .seh_proc exit_thunk.1 ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: .seh_stackalloc 64 @@ -284,8 +284,8 @@ ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_endproc ; -; CHECK-LABEL: thunk.2: -; CHECK: .seh_proc thunk.2 +; CHECK-LABEL: exit_thunk.2: +; CHECK: .seh_proc exit_thunk.2 ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: .seh_stackalloc 64 @@ -296,10 +296,9 @@ ; CHECK-NEXT: .seh_endprologue ; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect ; CHECK-NEXT: sub x0, x29, #16 -; CHECK-NEXT: stp s1, s2, [x29, #-12] -; CHECK-NEXT: stur s0, [x29, #-16] +; CHECK-NEXT: stp s2, s3, [x29, #-8] +; CHECK-NEXT: stp s0, s1, [x29, #-16] ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] -; CHECK-NEXT: stur s3, [x29, #-4] ; CHECK-NEXT: blr x8 ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload @@ -311,8 +310,8 @@ ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_endproc ; -; CHECK-LABEL: thunk.3: -; CHECK: .seh_proc thunk.3 +; CHECK-LABEL: exit_thunk.3: +; CHECK: .seh_proc exit_thunk.3 ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: .seh_stackalloc 80 @@ -324,10 +323,9 @@ ; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect ; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: add x1, sp, #32 -; CHECK-NEXT: stp s1, s2, [sp, #36] -; CHECK-NEXT: str s0, [sp, #32] +; CHECK-NEXT: stp s2, s3, [sp, #40] +; CHECK-NEXT: stp s0, s1, [sp, #32] ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] -; CHECK-NEXT: str s3, [sp, #44] ; CHECK-NEXT: blr x8 ; CHECK-NEXT: ldp x0, x1, [x29, #-16] ; CHECK-NEXT: .seh_startepilogue @@ -340,82 +338,176 @@ ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_endproc ; -; CHECK-LABEL: thunk.4: -; CHECK: .seh_proc thunk.4 +; CHECK-LABEL: exit_thunk.4: +; CHECK: .seh_proc exit_thunk.4 ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: .seh_stackalloc 48 -; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .seh_save_fplr 32 -; CHECK-NEXT: add x29, sp, #32 -; CHECK-NEXT: .seh_add_fp 32 +; CHECK-NEXT: stp x19, x20, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp_x x19, 64 +; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp x21, 16 +; CHECK-NEXT: str x25, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg x25, 32 +; CHECK-NEXT: stp x29, x30, [sp, #40] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 40 +; CHECK-NEXT: add x29, sp, #40 +; CHECK-NEXT: .seh_add_fp 40 ; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: add x8, x5, #15 +; CHECK-NEXT: mov x19, x3 +; CHECK-NEXT: lsr x15, x8, #4 +; CHECK-NEXT: mov x20, x2 +; CHECK-NEXT: mov x21, x1 +; CHECK-NEXT: mov x22, x0 +; CHECK-NEXT: mov x25, x9 +; CHECK-NEXT: bl __chkstk_arm64ec +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: sub x0, x8, x15, lsl #4 +; CHECK-NEXT: mov sp, x0 +; CHECK-NEXT: mov x1, x4 +; CHECK-NEXT: mov x2, x5 +; CHECK-NEXT: bl "#memcpy" ; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: mov x9, x25 +; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: fmov d0, x22 +; CHECK-NEXT: mov x1, x21 +; CHECK-NEXT: fmov d1, x21 +; CHECK-NEXT: mov x2, x20 +; CHECK-NEXT: fmov d2, x20 +; CHECK-NEXT: mov x3, x19 +; CHECK-NEXT: fmov d3, x19 ; CHECK-NEXT: blr x8 +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: .seh_save_fplr 32 -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: .seh_stackalloc 48 +; CHECK-NEXT: sub sp, x29, #40 +; CHECK-NEXT: .seh_add_fp 40 +; CHECK-NEXT: ldp x29, x30, [sp, #40] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 40 +; CHECK-NEXT: ldr x25, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg x25, 32 +; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp x21, 16 +; CHECK-NEXT: ldp x19, x20, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp_x x19, 64 ; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_endproc ; -; CHECK-LABEL: thunk.5: -; CHECK: .seh_proc thunk.5 +; CHECK-LABEL: exit_thunk.5: +; CHECK: .seh_proc exit_thunk.5 ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: sub sp, sp, #64 -; CHECK-NEXT: .seh_stackalloc 64 -; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .seh_save_fplr 48 -; CHECK-NEXT: add x29, sp, #48 -; CHECK-NEXT: .seh_add_fp 48 +; CHECK-NEXT: stp x19, x20, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp_x x19, 64 +; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp x21, 16 +; CHECK-NEXT: str x25, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .seh_save_reg x25, 32 +; CHECK-NEXT: stp x29, x30, [sp, #40] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 40 +; CHECK-NEXT: add x29, sp, #40 +; CHECK-NEXT: .seh_add_fp 40 ; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: add x8, x5, #15 +; CHECK-NEXT: mov x19, x3 +; CHECK-NEXT: lsr x15, x8, #4 +; CHECK-NEXT: mov x20, x2 +; CHECK-NEXT: mov x21, x1 +; CHECK-NEXT: mov x22, x0 +; CHECK-NEXT: mov x25, x9 +; CHECK-NEXT: bl __chkstk_arm64ec +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: sub x0, x8, x15, lsl #4 +; CHECK-NEXT: mov sp, x0 +; CHECK-NEXT: mov x1, x4 +; CHECK-NEXT: mov x2, x5 +; CHECK-NEXT: bl "#memcpy" ; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect -; CHECK-NEXT: str w5, [sp, #40] -; CHECK-NEXT: str w4, [sp, #32] ; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: mov x9, x25 +; CHECK-NEXT: mov x0, x22 +; CHECK-NEXT: fmov d0, x22 +; CHECK-NEXT: mov x1, x21 +; CHECK-NEXT: fmov d1, x21 +; CHECK-NEXT: mov x2, x20 +; CHECK-NEXT: fmov d2, x20 +; CHECK-NEXT: mov x3, x19 +; CHECK-NEXT: fmov d3, x19 ; CHECK-NEXT: blr x8 +; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: .seh_save_fplr 48 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: .seh_stackalloc 64 +; CHECK-NEXT: sub sp, x29, #40 +; CHECK-NEXT: .seh_add_fp 40 +; CHECK-NEXT: ldp x29, x30, [sp, #40] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 40 +; CHECK-NEXT: ldr x25, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg x25, 32 +; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp x21, 16 +; CHECK-NEXT: ldp x19, x20, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp_x x19, 64 ; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret ; CHECK-NEXT: .seh_endfunclet ; CHECK-NEXT: .seh_endproc ; -; CHECK-LABEL: thunk.6: -; CHECK: .seh_proc thunk.6 +; CHECK-LABEL: exit_thunk.6: +; CHECK: .seh_proc exit_thunk.6 ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: .seh_stackalloc 96 -; CHECK-NEXT: stp x29, x30, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: .seh_save_fplr 80 -; CHECK-NEXT: add x29, sp, #80 -; CHECK-NEXT: .seh_add_fp 80 +; CHECK-NEXT: stp x19, x20, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp_x x19, 48 +; CHECK-NEXT: stp x21, x22, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_regp x21, 16 +; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 32 +; CHECK-NEXT: add x29, sp, #32 +; CHECK-NEXT: .seh_add_fp 32 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .seh_stackalloc 16 ; CHECK-NEXT: .seh_endprologue -; CHECK-NEXT: adrp x10, __os_arm64x_dispatch_call_no_redirect -; CHECK-NEXT: mov w8, w3 -; CHECK-NEXT: mov w3, w2 -; CHECK-NEXT: mov w2, w1 -; CHECK-NEXT: mov w1, w0 -; CHECK-NEXT: sub x0, x29, #16 -; CHECK-NEXT: ldr x10, [x10, :lo12:__os_arm64x_dispatch_call_no_redirect] -; CHECK-NEXT: str w5, [sp, #48] -; CHECK-NEXT: str w4, [sp, #40] -; CHECK-NEXT: str w8, [sp, #32] -; CHECK-NEXT: blr x10 -; CHECK-NEXT: ldp x0, x1, [x29, #-16] +; CHECK-NEXT: add x8, x5, #31 +; CHECK-NEXT: mov x19, x2 +; CHECK-NEXT: lsr x15, x8, #4 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x9 +; CHECK-NEXT: bl __chkstk_arm64ec +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: sub x0, x8, x15, lsl #4 +; CHECK-NEXT: mov sp, x0 +; CHECK-NEXT: mov x1, x4 +; CHECK-NEXT: mov x2, x5 +; CHECK-NEXT: str x3, [x0], #8 +; CHECK-NEXT: bl "#memcpy" +; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_call_no_redirect +; CHECK-NEXT: ldr x8, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect] +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: sub x10, x29, #48 +; CHECK-NEXT: sub x0, x29, #48 +; CHECK-NEXT: mov x9, x22 +; CHECK-NEXT: mov x1, x21 +; CHECK-NEXT: fmov d1, x21 +; CHECK-NEXT: mov x2, x20 +; CHECK-NEXT: fmov d2, x20 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: mov x3, x19 +; CHECK-NEXT: fmov d3, x19 +; CHECK-NEXT: blr x8 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ldp x0, x1, [x29, #-48] ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldp x29, x30, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: .seh_save_fplr 80 -; CHECK-NEXT: add sp, sp, #96 -; CHECK-NEXT: .seh_stackalloc 96 +; CHECK-NEXT: sub sp, x29, #32 +; CHECK-NEXT: .seh_add_fp 32 +; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 32 +; CHECK-NEXT: ldp x21, x22, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp x21, 16 +; CHECK-NEXT: ldp x19, x20, [sp], #48 // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_regp_x x19, 48 ; CHECK-NEXT: .seh_endepilogue ; CHECK-NEXT: ret ; CHECK-NEXT: .seh_endfunclet