Index: lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.h +++ lib/Target/AArch64/AArch64FrameLowering.h @@ -69,6 +69,17 @@ bool enableStackSlotScavenging(const MachineFunction &MF) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const override; + + unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override; + + unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const; + + int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, + unsigned &FrameReg, + bool IgnoreSPUpdates) const override; + private: bool shouldCombineCSRLocalStackBump(MachineFunction &MF, unsigned StackBumpBytes) const; Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -115,6 +115,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" @@ -202,6 +203,9 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + // Win64 EH requires a frame pointer if funclets are present. + if (MF.hasEHFunclets()) + return true; // Retain behavior of always omitting the FP for leaf functions when possible. if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF)) return true; @@ -584,6 +588,7 @@ AArch64FunctionInfo *AFI = MF.getInfo(); bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry(); bool HasFP = hasFP(MF); + bool IsFunclet = MBB.isEHFuncletEntry(); // At this point, we're going to decide whether or not the function uses a // redzone. In most cases, the function doesn't have a redzone so let's @@ -604,7 +609,8 @@ if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; - int NumBytes = (int)MFI.getStackSize(); + int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF) + : (int)MFI.getStackSize(); if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); @@ -636,7 +642,10 @@ bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + // Var args are accounted for in the containting function, so don't + // include them for funclets. + unsigned FixedObject = (IsWin64 && !IsFunclet) ? + alignTo(AFI->getVarArgsGPRSize(), 16) : 0; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; // All of the remaining stack allocations are for locals. @@ -663,6 +672,13 @@ fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize()); ++MBBI; } + + // The code below is not applicable to funclets. We have emitted all the SEH + // opcodes that we needed to emit. The FP and BP belong to the containing + // function. + if (IsFunclet) + return; + if (HasFP) { // Only set up FP if we actually need to. Frame pointer is fp = // sp - fixedobject - 16. @@ -890,6 +906,16 @@ } } +static bool isFuncletReturnInstr(MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case AArch64::CATCHRET: + case AArch64::CLEANUPRET: + return true; + } +} + void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); @@ -898,14 +924,16 @@ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; + bool IsFunclet = isFuncletReturnInstr(*MBBI); if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri; } - int NumBytes = MFI.getStackSize(); - const AArch64FunctionInfo *AFI = MF.getInfo(); + int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF) + : MFI.getStackSize(); + AArch64FunctionInfo *AFI = MF.getInfo(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. @@ -962,9 +990,12 @@ bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; - uint64_t AfterCSRPopSize = ArgumentPopSize; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; + // Var args are accounted for in the containting function, so don't + // include them for funclets. + if (MF.hasEHFunclets()) + AFI->setLocalStackSize(NumBytes - PrologueSaveSize); bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); // Assume we can't combine the last pop with the sp restore. @@ -1144,6 +1175,11 @@ // being in range for direct access. If the FPOffset is positive, // that'll always be best, as the SP will be even further away. UseFP = true; + } else if (MF.hasEHFunclets()) { + assert( + Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) && + "Funclets should only be present on Win64"); + UseFP = true; } else { // We have the choice between FP and (SP or BP). if (FPOffsetFits && PreferFP) // If FP is the best fit, use it. @@ -1584,3 +1620,70 @@ const AArch64FunctionInfo *AFI = MF.getInfo(); return AFI->hasCalleeSaveStackFreeSpace(); } + +void AArch64FrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + // Mark the function as not having WinCFI. We will set it back to true in + // emitPrologue if it gets called and emits CFI. + MF.setHasWinCFI(false); + + // If this function isn't doing Win64-style C++ EH, we don't need to do + // anything. + if (!MF.hasEHFunclets()) + return; + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); + + // Create an UnwidHelp object. + int UnwindHelpFI = + MFI.CreateStackObject(/*size*/8, /*alignment*/16, false); + EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; + + MachineBasicBlock &MBB = MF.front(); + auto MBBI = MBB.begin(); + while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) + ++MBBI; + + if (MBBI->isTerminator()) + return; + // We need to store -2 into the UnwindHelp object at the start of the + // function. + DebugLoc DL = MBB.findDebugLoc(MBBI); + RS->enterBasicBlock(MBB); + unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0); + BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2); + BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi)) + .addReg(DstReg, getKillRegState(true)) + .addFrameIndex(UnwindHelpFI) + .addImm(0); +} + +// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before +// the update. This is easily retrieved as it is exactly the offset that is set +// in processFunctionBeforeFrameFinalized. +int AArch64FrameLowering::getFrameIndexReferencePreferSP( + const MachineFunction &MF, int FI, unsigned &FrameReg, + bool IgnoreSPUpdates) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + LLVM_DEBUG(dbgs() << "Offset from SP for " << FI << " is " + << MFI.getObjectOffset(FI) << "\n"); + FrameReg = AArch64::SP; + return MFI.getObjectOffset(FI); +} + +// Patch in zero for now. Haven't encountered any problems yet. +unsigned AArch64FrameLowering::getWinEHParentFrameOffset( + const MachineFunction &MF) const { + return 0; +} + +unsigned AArch64FrameLowering::getWinEHFuncletFrameSize( + const MachineFunction &MF) const { + // This is the size of the pushed CSRs. + unsigned CSSize = + MF.getInfo()->getCalleeSavedStackSize(); + // This is the amount of stack a funclet needs to allocate. + return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(), + getStackAlignment()); +} Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -302,6 +302,12 @@ MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; @@ -516,6 +522,8 @@ bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + /// Used for exception handling on Win64. + bool needsFixedCatchObjects() const override; private: /// Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1273,6 +1273,22 @@ return EndBB; } +MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( + MachineInstr &MI, MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + + assert(!isAsynchronousEHPersonality( + classifyEHPersonality(MF->getFunction().getPersonalityFn())) && + "SEH does not use catchret!"); + return BB; +} + +MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad( + MachineInstr &MI, MachineBasicBlock *BB) const { + MI.eraseFromParent(); + return BB; +} + MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { @@ -1288,6 +1304,11 @@ case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); + + case AArch64::CATCHRET: + return EmitLoweredCatchRet(MI, BB); + case AArch64::CATCHPAD: + return EmitLoweredCatchPad(MI, BB); } } @@ -11578,3 +11599,8 @@ MF.getFrameInfo().computeMaxCallFrameSize(MF); TargetLoweringBase::finalizeLowering(MF); } + +// Unlike X86, we let frame lowering assign offsets to all catch objects. +bool AArch64TargetLowering::needsFixedCatchObjects() const { + return false; +} Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -66,7 +66,8 @@ cl::desc("Restrict range of Bcc instructions (DEBUG)")); AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) - : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), + : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, + AArch64::CATCHRET), RI(STI.getTargetTriple()), Subtarget(STI) {} /// GetInstSize - Return the number of bytes of code the specified @@ -1593,11 +1594,36 @@ } bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { - if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD) + if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD && + MI.getOpcode() != AArch64::CATCHRET) return false; MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); + + if (MI.getOpcode() == AArch64::CATCHRET) { + // Skip to the first instruction before the epilog. + const TargetInstrInfo *TII = + MBB.getParent()->getSubtarget().getInstrInfo(); + MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); + auto MBBI = MachineBasicBlock::iterator(MI); + MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI); + while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) && + FirstEpilogSEH != MBB.begin()) + FirstEpilogSEH = std::prev(FirstEpilogSEH); + if (FirstEpilogSEH != MBB.begin()) + FirstEpilogSEH = std::next(FirstEpilogSEH); + BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP)) + .addReg(AArch64::X0) + .addMBB(TargetMBB); + BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri)) + .addReg(AArch64::X0) + .addReg(AArch64::X0) + .addMBB(TargetMBB) + .addImm(0); + return true; + } + unsigned Reg = MI.getOperand(0).getReg(); const GlobalValue *GV = cast((*MI.memoperands_begin())->getValue()); Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -3014,6 +3014,20 @@ let hasNoSchedulingInfo = 1; } +//===----------------------------------------------------------------------===// +// Pseudo instructions for Windows EH +//===----------------------------------------------------------------------===// +let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, + isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in { + def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>; + let usesCustomInserter = 1 in + def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>, + Sched<[]>; +} + +let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in +def CATCHPAD : Pseudo<(outs), (ins), [(catchpad)]>, Sched<[]>; //===----------------------------------------------------------------------===// // Floating point immediate move. Index: lib/Target/AArch64/AArch64MCInstLower.cpp =================================================================== --- lib/Target/AArch64/AArch64MCInstLower.cpp +++ lib/Target/AArch64/AArch64MCInstLower.cpp @@ -253,4 +253,17 @@ if (lowerOperand(MO, MCOp)) OutMI.addOperand(MCOp); } + + switch (OutMI.getOpcode()) { + case AArch64::CATCHRET: + OutMI = MCInst(); + OutMI.setOpcode(AArch64::RET); + OutMI.addOperand(MCOperand::createReg(AArch64::LR)); + break; + case AArch64::CLEANUPRET: + OutMI = MCInst(); + OutMI.setOpcode(AArch64::RET); + OutMI.addOperand(MCOperand::createReg(AArch64::LR)); + break; + } } Index: lib/Target/AArch64/AArch64RegisterInfo.h =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.h +++ lib/Target/AArch64/AArch64RegisterInfo.h @@ -54,6 +54,9 @@ // normal calls, so they need a different mask to represent this. const uint32_t *getTLSCallPreservedMask() const; + // Funclets on ARM64 Windows don't preserve any registers. + const uint32_t *getNoPreservedMask() const override; + /// getThisReturnPreservedMask - Returns a call preserved mask specific to the /// case that 'returned' is on an i64 first argument if the calling convention /// is one that can (partially) model this attribute with a preserved mask Index: lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.cpp +++ lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -117,6 +117,10 @@ return CSR_AArch64_TLS_ELF_RegMask; } +const uint32_t *AArch64RegisterInfo::getNoPreservedMask() const { + return CSR_AArch64_NoRegs_RegMask; +} + const uint32_t * AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { Index: test/CodeGen/AArch64/wineh-funclets.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/wineh-funclets.ll @@ -0,0 +1,83 @@ +; RUN: llc -o - %s -mtriple=aarch64-windows -stop-after=prologepilog | FileCheck %s +; This test case just checks that we can get past catchret, etc, and not the +; validity of the exception handling tables. It also checks that we generate the +; required store of -2 into the catch object, whose offset from the stack pointer +; will be patched into the tables by WinException. Other test cases will test +; the validity of these tables. +; CHECK-LABEL: bb.0.entry +; CHECK: $x2 = MOVi64imm -2 +; CHECK: STURXi killed $x2, $fp, -32 +; CHECK: BL @_CxxThrowException +; CHECK-LABEL: bb.1.catch (landing-pad) +; CHECK: CATCHRET %bb.2, %bb.0 +; CHECK-LABEL: bb.2.catchret.dest: +target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-unknown-windows-msvc19.10.25008" + +%rtti.TypeDescriptor2 = type { i8**, i8*, [3 x i8] } +%eh.CatchableType = type { i32, i32, i32, i32, i32, i32, i32 } +%eh.CatchableTypeArray.1 = type { i32, [1 x i32] } +%eh.ThrowInfo = type { i32, i32, i32, i32 } + +$"??_R0H@8" = comdat any + +$"_CT??_R0H@84" = comdat any + +$_CTA1H = comdat any + +$_TI1H = comdat any + +@"??_7type_info@@6B@" = external constant i8* +@"??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { i8** @"??_7type_info@@6B@", i8* null, [3 x i8] c".H\00" }, comdat +@__ImageBase = external dso_local constant i8 +@"_CT??_R0H@84" = linkonce_odr unnamed_addr constant %eh.CatchableType { i32 1, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%rtti.TypeDescriptor2* @"??_R0H@8" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32), i32 0, i32 -1, i32 0, i32 4, i32 0 }, section ".xdata", comdat +@_CTA1H = linkonce_odr unnamed_addr constant %eh.CatchableTypeArray.1 { i32 1, [1 x i32] [i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableType* @"_CT??_R0H@84" to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32)] }, section ".xdata", comdat +@_TI1H = linkonce_odr unnamed_addr constant %eh.ThrowInfo { i32 0, i32 0, i32 0, i32 trunc (i64 sub nuw nsw (i64 ptrtoint (%eh.CatchableTypeArray.1* @_CTA1H to i64), i64 ptrtoint (i8* @__ImageBase to i64)) to i32) }, section ".xdata", comdat + +; Function Attrs: noinline norecurse optnone +define dso_local i32 @main() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { +entry: + %retval = alloca i32, align 4 + %tmp = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 42, i32* %tmp, align 4 + %0 = bitcast i32* %tmp to i8* + invoke void @_CxxThrowException(i8* %0, %eh.ThrowInfo* @_TI1H) #1 + to label %unreachable unwind label %catch.dispatch + +catch.dispatch: ; preds = %entry + %1 = catchswitch within none [label %catch] unwind to caller + +catch: ; preds = %catch.dispatch + %2 = catchpad within %1 [%rtti.TypeDescriptor2* @"??_R0H@8", i32 0, i32* %i] + store i32 43, i32* %retval, align 4 + catchret from %2 to label %catchret.dest + +catchret.dest: ; preds = %catch + br label %return + +try.cont: ; No predecessors! + store i32 0, i32* %retval, align 4 + br label %return + +return: ; preds = %try.cont, %catchret.dest + %3 = load i32, i32* %retval, align 4 + ret i32 %3 + +unreachable: ; preds = %entry + unreachable +} + +declare dso_local void @_CxxThrowException(i8*, %eh.ThrowInfo*) + +declare dso_local i32 @__CxxFrameHandler3(...) + +attributes #0 = { noinline norecurse optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noreturn } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 2} +!1 = !{!"clang version 8.0.0 (http://llvm.org/git/clang.git b4ce07ad3aea023c81f4f6dc8119b56328ae1da7) (http://llvm.org/git/llvm.git 34ec5a2e299dd5c91c5525878afcdcb99b6b3db1)"}