Index: lib/CodeGen/AsmPrinter/WinException.h =================================================================== --- lib/CodeGen/AsmPrinter/WinException.h +++ lib/CodeGen/AsmPrinter/WinException.h @@ -72,6 +72,7 @@ const MCExpr *create32bitRef(const MCSymbol *Value); const MCExpr *create32bitRef(const GlobalValue *GV); + const MCExpr *getLabel(const MCSymbol *Label); const MCExpr *getLabelPlusOne(const MCSymbol *Label); const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom); const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf, Index: lib/CodeGen/AsmPrinter/WinException.cpp =================================================================== --- lib/CodeGen/AsmPrinter/WinException.cpp +++ lib/CodeGen/AsmPrinter/WinException.cpp @@ -235,6 +235,9 @@ if (!CurrentFuncletEntry) return; + bool isAArch64 = Asm->MF->getTarget().getTargetTriple().getArch() == + Triple::ArchType::aarch64; + const MachineFunction *MF = Asm->MF; if (shouldEmitMoves || shouldEmitPersonality) { const Function &F = MF->getFunction(); @@ -242,6 +245,16 @@ if (F.hasPersonalityFn()) Per = classifyEHPersonality(F.getPersonalityFn()->stripPointerCasts()); + // On funclet exit, we emit a fake "function" end marker, so that we can + // calculate the size of the funclet later on in MC Layer. + if (isAArch64) { + Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection); + Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd(); + MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection( + Asm->OutStreamer->getCurrentSectionOnly()); + Asm->OutStreamer->SwitchSection(XData); + } + // Emit an UNWIND_INFO struct describing the prologue. Asm->OutStreamer->EmitWinEHHandlerData(); @@ -286,6 +299,11 @@ return create32bitRef(Asm->getSymbol(GV)); } +const MCExpr *WinException::getLabel(const MCSymbol *Label) { + return MCSymbolRefExpr::create(Label, MCSymbolRefExpr::VK_COFF_IMGREL32, + Asm->OutContext); +} + const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) { return MCBinaryExpr::createAdd(create32bitRef(Label), MCConstantExpr::create(1, Asm->OutContext), @@ -588,6 +606,8 @@ const MCSymbol *EndLabel, int State) { auto &OS = *Asm->OutStreamer; MCContext &Ctx = Asm->OutContext; + bool isAArch64 = Asm->MF->getTarget().getTargetTriple().getArch() == + Triple::ArchType::aarch64; bool VerboseAsm = OS.isVerboseAsm(); auto AddComment = [&](const Twine &Comment) { @@ -613,9 +633,11 @@ } AddComment("LabelStart"); - OS.EmitValue(getLabelPlusOne(BeginLabel), 4); + OS.EmitValue(isAArch64 ? getLabel(BeginLabel) + : getLabelPlusOne(BeginLabel), 4); AddComment("LabelEnd"); - OS.EmitValue(getLabelPlusOne(EndLabel), 4); + OS.EmitValue(isAArch64 ? getLabel(EndLabel) + : getLabelPlusOne(EndLabel), 4); AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction" : "CatchAll"); OS.EmitValue(FilterOrFinally, 4); @@ -799,7 +821,7 @@ // TypeDescriptor *Type; // int32_t CatchObjOffset; // void (*Handler)(); - // int32_t ParentFrameOffset; // x64 only + // int32_t ParentFrameOffset; // x64 and AArch64 only // }; OS.EmitLabel(HandlerMapXData); for (const WinEHHandlerType &HT : TBME.HandlerArray) { @@ -857,6 +879,9 @@ const MachineFunction *MF, const WinEHFuncInfo &FuncInfo, SmallVectorImpl> &IPToStateTable) { + bool isAArch64 = Asm->MF->getTarget().getTargetTriple().getArch() == + Triple::ArchType::aarch64; + for (MachineFunction::const_iterator FuncletStart = MF->begin(), FuncletEnd = MF->begin(), End = MF->end(); @@ -901,7 +926,8 @@ ChangeLabel = StateChange.PreviousEndLabel; // Emit an entry indicating that PCs after 'Label' have this EH state. IPToStateTable.push_back( - std::make_pair(getLabelPlusOne(ChangeLabel), StateChange.NewState)); + std::make_pair(isAArch64 ? getLabel(ChangeLabel) + : getLabelPlusOne(ChangeLabel), StateChange.NewState)); // FIXME: assert that NewState is between CatchLow and CatchHigh. } } Index: lib/Target/AArch64/AArch64CallingConvention.td =================================================================== --- lib/Target/AArch64/AArch64CallingConvention.td +++ lib/Target/AArch64/AArch64CallingConvention.td @@ -288,6 +288,14 @@ D8, D9, D10, D11, D12, D13, D14, D15)>; +// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x. +// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs, +// and not (LR,FP) pairs. +def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22, + X23, X24, X25, X26, X27, X28, + D8, D9, D10, D11, + D12, D13, D14, D15)>; + // Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since // 'this' and the pointer return value are both passed in X0 in these cases, // this can be partially modelled by treating X0 as a callee-saved register; Index: lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.h +++ lib/Target/AArch64/AArch64FrameLowering.h @@ -69,6 +69,17 @@ bool enableStackSlotScavenging(const MachineFunction &MF) const override; + void processFunctionBeforeFrameFinalized(MachineFunction &MF, + RegScavenger *RS) const override; + + unsigned getWinEHParentFrameOffset(const MachineFunction &MF) const override; + + unsigned getWinEHFuncletFrameSize(const MachineFunction &MF) const; + + int getFrameIndexReferencePreferSP(const MachineFunction &MF, int FI, + unsigned &FrameReg, + bool IgnoreSPUpdates) const override; + private: bool shouldCombineCSRLocalStackBump(MachineFunction &MF, unsigned StackBumpBytes) const; Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -114,11 +114,13 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -153,6 +155,54 @@ /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()). static const unsigned DefaultSafeSPDisplacement = 255; +static bool isSEHOpcode(unsigned Opc) { + switch (Opc) { + default: + return false; + case AArch64::SEH_StackAlloc: + case AArch64::SEH_SaveFPLR: + case AArch64::SEH_SaveFPLR_X: + case AArch64::SEH_SaveReg: + case AArch64::SEH_SaveReg_X: + case AArch64::SEH_SaveRegP: + case AArch64::SEH_SaveRegP_X: + case AArch64::SEH_SaveFReg: + case AArch64::SEH_SaveFReg_X: + case AArch64::SEH_SaveFRegP: + case AArch64::SEH_SaveFRegP_X: + case AArch64::SEH_SetFP: + case AArch64::SEH_AddFP: + case AArch64::SEH_Nop: + case AArch64::SEH_PrologEnd: + case AArch64::SEH_EpilogStart: + case AArch64::SEH_EpilogEnd: + return true; + } +} + +// A small RAII class to handle insertion of SEH codes on multiple paths. +class InsertCFI { + MachineBasicBlock &MBB; + MachineBasicBlock::iterator LastMI; + const DebugLoc &DL; + const TargetInstrInfo &TII; + unsigned SEH_Opc; + MachineInstr::MIFlag Flag; + +public: + InsertCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator LastMI, + const DebugLoc &DL, const TargetInstrInfo &TII, unsigned SEH_Opc, + MachineInstr::MIFlag Flag) + : MBB(MBB), LastMI(LastMI), DL(DL), TII(TII), SEH_Opc(SEH_Opc), Flag(Flag) + { + assert(isSEHOpcode(SEH_Opc) && + "InsertCFI is only for inserting SEH opcodes"); + } + ~InsertCFI() { + BuildMI(MBB, LastMI, DL, TII.get(SEH_Opc)).setMIFlag(Flag); + } +}; + /// Look at each instruction that references stack frames and return the stack /// size limit beyond which some of these instructions will require a scratch /// register during their expansion later. @@ -181,6 +231,16 @@ return DefaultSafeSPDisplacement; } +static bool isFuncletReturnInstr(MachineInstr &MI) { + switch (MI.getOpcode()) { + case AArch64::CATCHRET: + case AArch64::CLEANUPRET: + return true; + default: + return false; + } +} + bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { if (!EnableRedZone) return false; @@ -201,6 +261,11 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + + // Win64 EH requires a frame pointer if funclets are present. + if (MF.hasEHFunclets()) + return true; + // Retain behavior of always omitting the FP for leaf functions when possible. if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF)) return true; @@ -378,6 +443,15 @@ !F.hasFnAttribute("no-stack-arg-probe"); } +static unsigned getRegNumX(unsigned Reg) { + if (Reg == AArch64::FP) + return 29; + else if (Reg == AArch64::LR) + return 30; + else + return Reg - AArch64::X0; +} + bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( MachineFunction &MF, unsigned StackBumpBytes) const { AArch64FunctionInfo *AFI = MF.getInfo(); @@ -408,12 +482,136 @@ return true; } +// Given a load or a store instruction, generate an appropriate unwinding SEH +// code on Windows. +static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, + const TargetInstrInfo &TII, + MachineInstr::MIFlag Flag) { + unsigned Opc = MBBI->getOpcode(); + MachineBasicBlock *MBB = MBBI->getParent(); + MachineFunction &MF = *MBB->getParent(); + DebugLoc DL = MBBI->getDebugLoc(); + unsigned ImmIdx = MBBI->getNumOperands() - 1; + int Imm = MBBI->getOperand(ImmIdx).getImm(); + MachineInstrBuilder MIB; + + switch (Opc) { + default: + assert(false && "No SEH Opcode for this instruction"); + case AArch64::STPDpre: + case AArch64::LDPDpost: { + if (Opc == AArch64::LDPDpost) + Imm = -Imm; + unsigned Reg0 = MBBI->getOperand(1).getReg(); + unsigned Reg1 = MBBI->getOperand(2).getReg(); + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X)) + .addImm(Reg0 - AArch64::D0) + .addImm(Reg1 - AArch64::D0) + .addImm(Imm * 8) + .setMIFlag(Flag); + break; + } + case AArch64::STPXpre: + case AArch64::LDPXpost: { + if (Opc == AArch64::LDPXpost) { + Imm = -Imm; + } + unsigned Reg0 = MBBI->getOperand(1).getReg(); + unsigned Reg1 = MBBI->getOperand(2).getReg(); + if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X)) + .addImm(Imm * 8) + .setMIFlag(Flag); + else { + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X)) + .addImm(Reg0 - AArch64::X0) + .addImm(Reg1 - AArch64::X0) + .addImm(Imm * 8) + .setMIFlag(Flag); + } + break; + } + case AArch64::STRDpre: + case AArch64::LDRDpost: { + if (Opc == AArch64::LDRDpost) + Imm = -Imm; + unsigned Reg = MBBI->getOperand(1).getReg() - AArch64::D0; + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X)) + .addImm(Reg) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + case AArch64::STRXpre: + case AArch64::LDRXpost: { + if (Opc == AArch64::LDRXpost) + Imm = -Imm; + unsigned Reg = getRegNumX(MBBI->getOperand(1).getReg()); + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X)) + .addImm(Reg) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + case AArch64::STPDi: + case AArch64::LDPDi: { + unsigned Reg0 = MBBI->getOperand(0).getReg(); + unsigned Reg1 = MBBI->getOperand(1).getReg(); + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP)) + .addImm(Reg0 - AArch64::D0) + .addImm(Reg1 - AArch64::D0) + .addImm(Imm * 8) + .setMIFlag(Flag); + break; + } + case AArch64::STPXi: + case AArch64::LDPXi: { + unsigned Reg0 = MBBI->getOperand(0).getReg(); + unsigned Reg1 = MBBI->getOperand(1).getReg(); + if (MBBI->getOperand(0).getReg() == AArch64::FP && + MBBI->getOperand(1).getReg() == AArch64::LR) + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR)) + .addImm(Imm * 8) + .setMIFlag(Flag); + else + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP)) + .addImm(Reg0 - AArch64::X0) + .addImm(Reg1 - AArch64::X0) + .addImm(Imm * 8) + .setMIFlag(Flag); + break; + } + // scaled + case AArch64::STRXui: + case AArch64::LDRXui: { + unsigned Reg = getRegNumX(MBBI->getOperand(0).getReg()); + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg)) + .addImm(Reg) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + case AArch64::STRDui: + case AArch64::LDRDui: { + unsigned Reg = MBBI->getOperand(0).getReg() - AArch64::D0; + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg)) + .addImm(Reg) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + } + auto I = MBB->insertAfter(MBBI, MIB); + return I; +} + // Convert callee-save register save/restore instruction to do stack pointer // decrement/increment to allocate/deallocate the callee-save stack area by // converting store/load to use pre/post increment version. static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { + const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, + bool NeedsWinCFI = false, bool InProlog = true) { // Ignore instructions that do not operate on SP, i.e. shadow call stack // instructions. while (MBBI->getOpcode() == AArch64::STRXpost || @@ -457,6 +655,13 @@ break; } + // Get rid of the SEH code associated with the old instruction. + if (NeedsWinCFI) { + auto SEH = std::next(MBBI); + if (isSEHOpcode(SEH->getOpcode())) + SEH->eraseFromParent(); + } + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); MIB.addReg(AArch64::SP, RegState::Define); @@ -481,15 +686,24 @@ MIB.setMIFlags(MBBI->getFlags()); MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end()); + // Generate a new SEH code that corresponds to the new instruction. + if (NeedsWinCFI) + InsertSEH(*MIB, *TII, + InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy); + return std::prev(MBB.erase(MBBI)); } // Fixup callee-save register save/restore instructions to take into account // combined SP bump by adding the local stack size to the stack offsets. static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, - unsigned LocalStackSize) { + unsigned LocalStackSize, + bool NeedsWinCFI) { unsigned Opc = MI.getOpcode(); + if (isSEHOpcode(Opc)) + return; + // Ignore instructions that do not operate on SP, i.e. shadow call stack // instructions. if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre) { @@ -512,6 +726,33 @@ // All generated opcodes have scaled offsets. assert(LocalStackSize % 8 == 0); OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); + + // Fix up the SEH opcode associated with the save/restore instruction. + if (NeedsWinCFI) { + auto SEH_MI = std::next(MachineBasicBlock::iterator(MI)); + auto &MBB = *(MI.getParent()); + if (SEH_MI != MBB.end()) { + unsigned SEHOpc = SEH_MI->getOpcode(); + if (!isSEHOpcode(SEHOpc)) + return; + MachineOperand *ImmOpnd = nullptr; + unsigned ImmIdx = SEH_MI->getNumOperands()-1; + switch (SEHOpc) { + default: + assert(false && "Fix the offset in the SEH instruction"); + break; + case AArch64::SEH_SaveFPLR: + case AArch64::SEH_SaveRegP: + case AArch64::SEH_SaveReg: + case AArch64::SEH_SaveFRegP: + case AArch64::SEH_SaveFReg: + ImmOpnd = &SEH_MI->getOperand(ImmIdx); + break; + } + if (ImmOpnd) + ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize); + } + } } static void adaptForLdStOpt(MachineBasicBlock &MBB, @@ -558,6 +799,12 @@ AArch64FunctionInfo *AFI = MF.getInfo(); bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry(); bool HasFP = hasFP(MF); + bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool NeedsWinCFI = IsWin64Prologue && F.needsUnwindTableEntry(); + MF.setHasWinCFI(NeedsWinCFI); + bool IsFunclet = MBB.isEHFuncletEntry(); + assert((!IsFunclet || IsWin64Prologue) && + "Funclets should only be present on Windows"); // At this point, we're going to decide whether or not the function uses a // redzone. In most cases, the function doesn't have a redzone so let's @@ -568,15 +815,28 @@ // to determine the end of the prologue. DebugLoc DL; + // The very laset FrameSetup instruction indicates the end of prologue. Emit + // a SEH opcode indicating the prologue end. + if (NeedsWinCFI) { + MachineBasicBlock::iterator LastFrameSetupI = MBB.begin(); + while (LastFrameSetupI != MBB.end() && + LastFrameSetupI->getFlag(MachineInstr::FrameSetup)) + ++LastFrameSetupI; + DebugLoc DL_CFI = + (MBB.end() == LastFrameSetupI) ? DL : LastFrameSetupI->getDebugLoc(); + InsertCFI InsertCFI(MBB, LastFrameSetupI, DL_CFI, *TII, + AArch64::SEH_PrologEnd, MachineInstr::FrameSetup); + } + // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; + int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF) + : (int)MFI.getStackSize(); - int NumBytes = (int)MFI.getStackSize(); if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); - // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); @@ -589,7 +849,7 @@ ++NumRedZoneFunctions; } else { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + MachineInstr::FrameSetup, false, NeedsWinCFI); // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); @@ -605,20 +865,21 @@ bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + unsigned FixedObject = (IsWin64 && !IsFunclet) ? + alignTo(AFI->getVarArgsGPRSize(), 16) : 0; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes - PrologueSaveSize); - bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); if (CombineSPBump) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + MachineInstr::FrameSetup, false, NeedsWinCFI); NumBytes = 0; - } else if (PrologueSaveSize != 0) { + } else if (AFI->getCalleeSavedStackSize() != 0) { MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII, - -PrologueSaveSize); + -PrologueSaveSize, + NeedsWinCFI); NumBytes -= PrologueSaveSize; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); @@ -629,9 +890,17 @@ MachineBasicBlock::iterator End = MBB.end(); while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) { if (CombineSPBump) - fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize()); + fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), + NeedsWinCFI); ++MBBI; } + + // The code below is not applicable to funclets. We have emitted all the + // SEH opcodes that we needed to emit. The FP and BP belong to the + // containing function. + if (IsFunclet) + return; + if (HasFP) { // Only set up FP if we actually need to. Frame pointer is fp = // sp - fixedobject - 16. @@ -644,7 +913,7 @@ // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, - MachineInstr::FrameSetup); + MachineInstr::FrameSetup, false, NeedsWinCFI); } if (windowsRequiresStackProbe(MF, NumBytes)) { @@ -701,7 +970,7 @@ // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + MachineInstr::FrameSetup, false, NeedsWinCFI); if (NeedsRealignment) { const unsigned Alignment = MFI.getMaxAlignment(); @@ -724,6 +993,10 @@ .addReg(scratchSPReg, RegState::Kill) .addImm(andMaskEncoded); AFI->setStackRealigned(true); + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)). + addImm(NumBytes & andMaskEncoded). + setMIFlag(MachineInstr::FrameSetup); } } @@ -737,6 +1010,9 @@ if (RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)). + setMIFlag(MachineInstr::FrameSetup); } if (needsFrameMoves) { @@ -840,14 +1116,24 @@ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; + bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF.getFunction().needsUnwindTableEntry(); + bool IsFunclet = isFuncletReturnInstr(*MBBI); + if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri; } - int NumBytes = MFI.getStackSize(); - const AArch64FunctionInfo *AFI = MF.getInfo(); + + if (NeedsWinCFI) + InsertCFI InsertCFI(MBB, MBBI, DL, *TII, AArch64::SEH_EpilogEnd, + MachineInstr::FrameDestroy); + + int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF) + : (int)MFI.getStackSize(); + AArch64FunctionInfo *AFI = MF.getInfo(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. @@ -901,22 +1187,28 @@ bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + // Funclets don't contain any fixed objects (varargs, etc). + unsigned FixedObject = (IsWin64 && !IsFunclet) ? + alignTo(AFI->getVarArgsGPRSize(), 16) : 0; uint64_t AfterCSRPopSize = ArgumentPopSize; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; + AFI->setLocalStackSize(NumBytes - PrologueSaveSize); bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); // Assume we can't combine the last pop with the sp restore. - if (!CombineSPBump && PrologueSaveSize != 0) { + if (!CombineSPBump && AFI->getCalleeSavedStackSize() != 0) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); + while (isSEHOpcode(Pop->getOpcode())) + Pop = std::prev(Pop); // Converting the last ldp to a post-index ldp is valid only if the last // ldp's offset is 0. const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1); // If the offset is 0, convert it to a post-index ldp. if (OffsetOp.getImm() == 0) { convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII, - PrologueSaveSize); + PrologueSaveSize, NeedsWinCFI, + false); } else { // If not, make sure to emit an add after the last ldp. // We're doing this by transfering the size to be restored from the @@ -936,15 +1228,20 @@ if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) { ++LastPopI; break; - } else if (CombineSPBump) - fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize()); + } else if (CombineSPBump && !isSEHOpcode(LastPopI->getOpcode())) + fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), + NeedsWinCFI); } + if (NeedsWinCFI) + BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart)). + setMIFlag(MachineInstr::FrameDestroy); + // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, NumBytes + AfterCSRPopSize, TII, - MachineInstr::FrameDestroy); + MachineInstr::FrameDestroy, false, NeedsWinCFI); return; } @@ -972,7 +1269,8 @@ adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - StackRestoreBytes, TII, MachineInstr::FrameDestroy); + StackRestoreBytes, TII, MachineInstr::FrameDestroy, + false, NeedsWinCFI); if (Done) return; @@ -986,10 +1284,10 @@ if (MFI.hasVarSizedObjects() || AFI->isStackRealigned()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -AFI->getCalleeSavedStackSize() + 16, TII, - MachineInstr::FrameDestroy); + MachineInstr::FrameDestroy, false, NeedsWinCFI); else if (NumBytes) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, - MachineInstr::FrameDestroy); + MachineInstr::FrameDestroy, false, NeedsWinCFI); // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save @@ -1010,7 +1308,8 @@ adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, - AfterCSRPopSize, TII, MachineInstr::FrameDestroy); + AfterCSRPopSize, TII, MachineInstr::FrameDestroy, + false, NeedsWinCFI); } } @@ -1034,6 +1333,7 @@ const AArch64Subtarget &Subtarget = MF.getSubtarget(); bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); + bool HasWin64EHFunclets = IsWin64 && MF.hasEHFunclets(); unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16; int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); @@ -1084,6 +1384,8 @@ // being in range for direct access. If the FPOffset is positive, // that'll always be best, as the SP will be even further away. UseFP = true; + } else if (HasWin64EHFunclets) { + UseFP = true; } else { // We have the choice between FP and (SP or BP). if (FPOffsetFits && PreferFP) // If FP is the best fit, use it. @@ -1160,6 +1462,8 @@ if (CSI.empty()) return; + bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF.getFunction().needsUnwindTableEntry(); AArch64FunctionInfo *AFI = MF.getInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); CallingConv::ID CC = MF.getFunction().getCallingConv(); @@ -1173,6 +1477,11 @@ "Odd number of callee-saved regs to spill!"); int Offset = AFI->getCalleeSavedStackSize(); + // On Win64, we don't necessarily pair all registers or all registers except + // possibly one because the unwind opcodes for stack unwinding require that + // only consecutive registers are paired. This flag makes sure that the + // padding below is done only once. + bool FixupDone = false; for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); @@ -1181,11 +1490,16 @@ AArch64::FPR64RegClass.contains(RPI.Reg1)); RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); - // Add the next reg to the pair if it is in the same register class. + // If we are generating register pairs for a Windows function that requires + // EH support, then pair consecutive registers only. There are no unwind + // opcodes for saves/restores of non-consectuve register pairs. + // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frep_x. + // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling if (i + 1 < Count) { unsigned NextReg = CSI[i + 1].getReg(); - if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || - (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) + if (((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || + (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) && + (!NeedsWinCFI || (NextReg == RPI.Reg1 + 1))) RPI.Reg2 = NextReg; } @@ -1219,9 +1533,11 @@ RPI.FrameIdx = CSI[i].getFrameIdx(); - if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { + if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired() && + !FixupDone) { // Round up size of non-pair to pair size if we need to pad the // callee-save area to ensure 16-byte alignment. + FixupDone = NeedsWinCFI; Offset -= 16; assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16); MFI.setObjectAlignment(RPI.FrameIdx, 16); @@ -1245,6 +1561,8 @@ const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF.getFunction().needsUnwindTableEntry(); DebugLoc DL; SmallVector RegPairs; @@ -1293,9 +1611,21 @@ if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; dbgs() << ")\n"); + assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) && + "Windows unwdinding requires a consecutive (FP,LR) pair"); + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (!MRI.isReserved(Reg1)) MBB.addLiveIn(Reg1); + + // Windows unwinding codes require that gprs be consecutive if they are paired. + if (NeedsWinCFI) { + MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)); + MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), + MachineMemOperand::MOStore, 8, 8)); + } + if (RPI.isPaired()) { if (!MRI.isReserved(Reg2)) MBB.addLiveIn(Reg2); @@ -1304,13 +1634,21 @@ MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), MachineMemOperand::MOStore, 8, 8)); } - MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) - .addReg(AArch64::SP) - .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit - .setMIFlag(MachineInstr::FrameSetup); - MIB.addMemOperand(MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), - MachineMemOperand::MOStore, 8, 8)); + + if (NeedsWinCFI) { + MIB.addReg(AArch64::SP) + .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit + .setMIFlag(MachineInstr::FrameSetup); + InsertSEH(MIB, TII, MachineInstr::FrameSetup); + } else { + MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) + .addReg(AArch64::SP) + .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit + .setMIFlag(MachineInstr::FrameSetup); + MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), + MachineMemOperand::MOStore, 8, 8)); + } } return true; } @@ -1323,6 +1661,8 @@ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; SmallVector RegPairs; + bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF.getFunction().needsUnwindTableEntry(); if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -1355,19 +1695,36 @@ dbgs() << ")\n"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); + + if (NeedsWinCFI) { + MIB.addReg(Reg1, getDefRegState(true)); + MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), + MachineMemOperand::MOLoad, 8, 8)); + } + if (RPI.isPaired()) { MIB.addReg(Reg2, getDefRegState(true)); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), MachineMemOperand::MOLoad, 8, 8)); } - MIB.addReg(Reg1, getDefRegState(true)) - .addReg(AArch64::SP) - .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit - .setMIFlag(MachineInstr::FrameDestroy); - MIB.addMemOperand(MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), - MachineMemOperand::MOLoad, 8, 8)); + + + if (NeedsWinCFI) { + MIB.addReg(AArch64::SP) + .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit + .setMIFlag(MachineInstr::FrameDestroy); + InsertSEH(MIB, TII, MachineInstr::FrameDestroy); + } else { + MIB.addReg(Reg1, getDefRegState(true)) + .addReg(AArch64::SP) + .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit + .setMIFlag(MachineInstr::FrameDestroy); + MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), + MachineMemOperand::MOLoad, 8, 8)); + } }; if (ReverseCSRRestoreSeq) @@ -1524,3 +1881,70 @@ const AArch64FunctionInfo *AFI = MF.getInfo(); return AFI->hasCalleeSaveStackFreeSpace(); } + +void AArch64FrameLowering::processFunctionBeforeFrameFinalized( + MachineFunction &MF, RegScavenger *RS) const { + // Mark the function as not having WinCFI. We will set it back to true in + // emitPrologue if it gets called and emits CFI. + MF.setHasWinCFI(false); + + // If this function isn't doing Win64-style C++ EH, we don't need to do + // anything. + if (!MF.hasEHFunclets()) + return; + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); + + // Create an UnwidHelp object. + int UnwindHelpFI = + MFI.CreateStackObject(/*size*/8, /*alignment*/16, false); + EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; + + MachineBasicBlock &MBB = MF.front(); + auto MBBI = MBB.begin(); + while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) + ++MBBI; + + if (MBBI->isTerminator()) + return; + // We need to store -2 into the UnwindHelp object at the start of the + // function. + DebugLoc DL = MBB.findDebugLoc(MBBI); + RS->enterBasicBlock(MBB); + unsigned DstReg = RS->scavengeRegister(&AArch64::GPR64RegClass, MBBI, 0); + BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2); + BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi)) + .addReg(DstReg, getKillRegState(true)) + .addFrameIndex(UnwindHelpFI) + .addImm(0); +} + +// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before +// the update. This is easily retrieved as it is exactly the offset that is set +// in processFunctionBeforeFrameFinalized. +int AArch64FrameLowering::getFrameIndexReferencePreferSP( + const MachineFunction &MF, int FI, unsigned &FrameReg, + bool IgnoreSPUpdates) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + LLVM_DEBUG(dbgs() << "Offset from SP for " << FI << " is " + << MFI.getObjectOffset(FI) << "\n"); + FrameReg = AArch64::SP; + return MFI.getObjectOffset(FI); +} + +// Patch in zero for now. Haven't encountered any problems yet. +unsigned AArch64FrameLowering::getWinEHParentFrameOffset( + const MachineFunction &MF) const { + return 0; +} + +unsigned AArch64FrameLowering::getWinEHFuncletFrameSize( + const MachineFunction &MF) const { + // This is the size of the pushed CSRs. + unsigned CSSize = + MF.getInfo()->getCalleeSavedStackSize(); + // This is the amount of stack a funclet needs to allocate. + return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(), + getStackAlignment()); +} Index: lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.h +++ lib/Target/AArch64/AArch64ISelLowering.h @@ -301,6 +301,12 @@ MachineBasicBlock *EmitF128CSEL(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, + MachineBasicBlock *BB) const; + + MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI, + MachineBasicBlock *BB) const; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override; @@ -514,6 +520,8 @@ bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + bool needsFixedCatchObjects() const override; + private: /// Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1272,6 +1272,22 @@ return EndBB; } +MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( + MachineInstr &MI, MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + + assert(!isAsynchronousEHPersonality( + classifyEHPersonality(MF->getFunction().getPersonalityFn())) && + "SEH does not use catchret!"); + return BB; +} + +MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad( + MachineInstr &MI, MachineBasicBlock *BB) const { + MI.eraseFromParent(); + return BB; +} + MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { @@ -1287,6 +1303,11 @@ case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); + + case AArch64::CATCHRET: + return EmitLoweredCatchRet(MI, BB); + case AArch64::CATCHPAD: + return EmitLoweredCatchPad(MI, BB); } } @@ -11540,3 +11561,8 @@ MF.getFrameInfo().computeMaxCallFrameSize(MF); TargetLoweringBase::finalizeLowering(MF); } + +// Unlike X86, we let frame lowering assign offset to all the catch objects. +bool AArch64TargetLowering::needsFixedCatchObjects() const { + return false; +} Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -286,7 +286,7 @@ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag = MachineInstr::NoFlags, - bool SetNZCV = false); + bool SetNZCV = false, bool NeedsWinCFI = false); /// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the /// FP. Return false if the offset could not be handled directly in MI, and Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -66,7 +66,8 @@ cl::desc("Restrict range of Bcc instructions (DEBUG)")); AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) - : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), + : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP, + AArch64::CATCHRET), RI(STI.getTargetTriple()), Subtarget(STI) {} /// GetInstSize - Return the number of bytes of code the specified @@ -1593,11 +1594,35 @@ } bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { - if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD) + if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD && + MI.getOpcode() != AArch64::CATCHRET) return false; MachineBasicBlock &MBB = *MI.getParent(); DebugLoc DL = MI.getDebugLoc(); + + if (MI.getOpcode() == AArch64::CATCHRET) { + // Skip past epilogue + const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo(); + MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); + auto MBBI = MachineBasicBlock::iterator(MI); + MachineBasicBlock::iterator SEH_MBBI = std::prev(MBBI); + while (SEH_MBBI->getFlag(MachineInstr::FrameDestroy) && + SEH_MBBI != MBB.begin()) + SEH_MBBI = std::prev(SEH_MBBI); + if (SEH_MBBI != MBB.begin()) + SEH_MBBI = std::next(SEH_MBBI); + BuildMI(MBB, SEH_MBBI, DL, TII->get(AArch64::ADRP)) + .addReg(AArch64::X0) + .addMBB(TargetMBB); + BuildMI(MBB, SEH_MBBI, DL, TII->get(AArch64::ADDXri)) + .addReg(AArch64::X0) + .addReg(AArch64::X0) + .addMBB(TargetMBB) + .addImm(0); + return true; + } + unsigned Reg = MI.getOperand(0).getReg(); const GlobalValue *GV = cast((*MI.memoperands_begin())->getValue()); @@ -2949,7 +2974,8 @@ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int Offset, const TargetInstrInfo *TII, - MachineInstr::MIFlag Flag, bool SetNZCV) { + MachineInstr::MIFlag Flag, bool SetNZCV, + bool NeedsWinCFI) { if (DestReg == SrcReg && Offset == 0) return; @@ -3004,6 +3030,21 @@ .addImm(Offset) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) .setMIFlag(Flag); + + if (NeedsWinCFI) { + if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || + (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { + if (Offset == 0) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)). + setMIFlag(Flag); + else + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)). + addImm(Offset).setMIFlag(Flag); + } else if (DestReg == AArch64::SP) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)). + addImm(Offset).setMIFlag(Flag); + } + } } MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -2984,6 +2984,21 @@ } //===----------------------------------------------------------------------===// +// Pseudo instructions for Windows EH +//===----------------------------------------------------------------------===// +let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, + isCodeGenOnly = 1, isReturn = 1, isPseudo = 1 in { + def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>; + let usesCustomInserter = 1 in + def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>, + Sched<[]>; +} + +let hasSideEffects = 1, hasCtrlDep = 1, isCodeGenOnly = 1, + usesCustomInserter = 1 in +def CATCHPAD : Pseudo<(outs), (ins), [(catchpad)]>, Sched<[]>; + +//===----------------------------------------------------------------------===// // Instructions used for emitting unwind opcodes on ARM64 Windows. //===----------------------------------------------------------------------===// let isPseudo = 1 in { Index: lib/Target/AArch64/AArch64MCInstLower.cpp =================================================================== --- lib/Target/AArch64/AArch64MCInstLower.cpp +++ lib/Target/AArch64/AArch64MCInstLower.cpp @@ -253,4 +253,17 @@ if (lowerOperand(MO, MCOp)) OutMI.addOperand(MCOp); } + + switch (OutMI.getOpcode()) { + case AArch64::CATCHRET: + OutMI = MCInst(); + OutMI.setOpcode(AArch64::RET); + OutMI.addOperand(MCOperand::createReg(AArch64::LR)); + break; + case AArch64::CLEANUPRET: + OutMI = MCInst(); + OutMI.setOpcode(AArch64::RET); + OutMI.addOperand(MCOperand::createReg(AArch64::LR)); + break; + } } Index: lib/Target/AArch64/AArch64MachineFunctionInfo.h =================================================================== --- lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -79,6 +79,11 @@ /// registers. unsigned VarArgsFPRSize = 0; + /// \brief Size of fixed objects on the stack that are not part of varargs. + /// This is needed for Windows AArch64 exception handling, as fixed objects + /// need to be created on the stack. + unsigned FixedObjsSize = 0; + /// True if this function has a subset of CSRs that is handled explicitly via /// copies. bool IsSplitCSR = false; @@ -156,6 +161,9 @@ unsigned getVarArgsGPRSize() const { return VarArgsGPRSize; } void setVarArgsGPRSize(unsigned Size) { VarArgsGPRSize = Size; } + unsigned getNonVarArgsFixedObjsSize() const { return FixedObjsSize; } + void setNonVarArgsFixedObjsSize(unsigned Size) { FixedObjsSize = Size; } + int getVarArgsFPRIndex() const { return VarArgsFPRIndex; } void setVarArgsFPRIndex(int Index) { VarArgsFPRIndex = Index; } Index: lib/Target/AArch64/AArch64RegisterInfo.h =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.h +++ lib/Target/AArch64/AArch64RegisterInfo.h @@ -54,6 +54,9 @@ // normal calls, so they need a different mask to represent this. const uint32_t *getTLSCallPreservedMask() const; + // Funclets on ARM64 Windows don't preserve any registers. + const uint32_t *getNoPreservedMask() const override; + /// getThisReturnPreservedMask - Returns a call preserved mask specific to the /// case that 'returned' is on an i64 first argument if the calling convention /// is one that can (partially) model this attribute with a preserved mask Index: lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.cpp +++ lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -42,6 +42,8 @@ const MCPhysReg * AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); + if (MF->getSubtarget().isTargetWindows()) + return CSR_Win_AArch64_AAPCS_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::GHC) // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around @@ -117,6 +119,10 @@ return CSR_AArch64_TLS_ELF_RegMask; } +const uint32_t *AArch64RegisterInfo::getNoPreservedMask() const { + return CSR_AArch64_NoRegs_RegMask; +} + const uint32_t * AArch64RegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -309,6 +309,7 @@ bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); } bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } + bool isTargetWin64() const { return TargetTriple.isOSWindows(); } bool useAA() const override { return UseAA; } @@ -346,6 +347,7 @@ bool isCallingConvWin64(CallingConv::ID CC) const { switch (CC) { case CallingConv::C: + case CallingConv::Fast: return isTargetWindows(); case CallingConv::Win64: return true; Index: lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp =================================================================== --- lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -115,6 +115,7 @@ CommentString = ";"; ExceptionsType = ExceptionHandling::WinEH; + WinEHEncodingType = WinEH::EncodingType::Itanium; } AArch64MCAsmInfoGNUCOFF::AArch64MCAsmInfoGNUCOFF() { Index: test/CodeGen/AArch64/win64_vararg.ll =================================================================== --- test/CodeGen/AArch64/win64_vararg.ll +++ test/CodeGen/AArch64/win64_vararg.ll @@ -104,7 +104,7 @@ ; CHECK-LABEL: fp ; CHECK: str x21, [sp, #-96]! -; CHECK: stp x20, x19, [sp, #16] +; CHECK: stp x19, x20, [sp, #16] ; CHECK: stp x29, x30, [sp, #32] ; CHECK: add x29, sp, #32 ; CHECK: add x8, x29, #24 @@ -125,7 +125,7 @@ ; CHECK: mov x4, xzr ; CHECK: bl __stdio_common_vsprintf ; CHECK: ldp x29, x30, [sp, #32] -; CHECK: ldp x20, x19, [sp, #16] +; CHECK: ldp x19, x20, [sp, #16] ; CHECK: cmp w0, #0 ; CHECK: csinv w0, w0, wzr, ge ; CHECK: ldr x21, [sp], #96 @@ -151,8 +151,8 @@ ; CHECK-LABEL: vla ; CHECK: str x23, [sp, #-112]! -; CHECK: stp x22, x21, [sp, #16] -; CHECK: stp x20, x19, [sp, #32] +; CHECK: stp x21, x22, [sp, #16] +; CHECK: stp x19, x20, [sp, #32] ; CHECK: stp x29, x30, [sp, #48] ; CHECK: add x29, sp, #48 ; CHECK: add x8, x29, #16 @@ -183,8 +183,8 @@ ; CHECK: mov sp, [[REG2]] ; CHECK: sub sp, x29, #48 ; CHECK: ldp x29, x30, [sp, #48] -; CHECK: ldp x20, x19, [sp, #32] -; CHECK: ldp x22, x21, [sp, #16] +; CHECK: ldp x19, x20, [sp, #32] +; CHECK: ldp x21, x22, [sp, #16] ; CHECK: ldr x23, [sp], #112 ; CHECK: ret define void @vla(i32, i8*, ...) local_unnamed_addr { @@ -212,8 +212,9 @@ ; CHECK-LABEL: snprintf ; CHECK: sub sp, sp, #96 -; CHECK: stp x21, x20, [sp, #16] -; CHECK: stp x19, x30, [sp, #32] +; CHECK: str x21, [sp, #16] +; CHECK: stp x19, x20, [sp, #24] +; CHECK: str x30, [sp, #40] ; CHECK: add x8, sp, #56 ; CHECK: mov x19, x2 ; CHECK: mov x20, x1 @@ -231,8 +232,9 @@ ; CHECK: mov x3, x19 ; CHECK: mov x4, xzr ; CHECK: bl __stdio_common_vsprintf -; CHECK: ldp x19, x30, [sp, #32] -; CHECK: ldp x21, x20, [sp, #16] +; CHECK: ldr x30, [sp, #40] +; CHECK: ldp x19, x20, [sp, #24] +; CHECK: ldr x21, [sp, #16] ; CHECK: cmp w0, #0 ; CHECK: csinv w0, w0, wzr, ge ; CHECK: add sp, sp, #96