Index: lib/Target/AArch64/AArch64CallingConvention.td =================================================================== --- lib/Target/AArch64/AArch64CallingConvention.td +++ lib/Target/AArch64/AArch64CallingConvention.td @@ -288,6 +288,14 @@ D8, D9, D10, D11, D12, D13, D14, D15)>; +// Win64 has unwinding codes for an (FP,LR) pair, save_fplr and save_fplr_x. +// We put FP before LR, so that frame lowering logic generates (FP,LR) pairs, +// and not (LR,FP) pairs. +def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add FP, LR, X19, X20, X21, X22, + X23, X24, X25, X26, X27, X28, + D8, D9, D10, D11, + D12, D13, D14, D15)>; + // Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since // 'this' and the pointer return value are both passed in X0 in these cases, // this can be partially modelled by treating X0 as a callee-saved register; Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -115,11 +115,13 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCDwarf.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -434,12 +436,154 @@ return true; } +// Given a load or a store instruction, generate an appropriate unwinding SEH +// code on Windows. +static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI, + const TargetInstrInfo &TII, + MachineInstr::MIFlag Flag) { + unsigned Opc = MBBI->getOpcode(); + MachineBasicBlock *MBB = MBBI->getParent(); + MachineFunction &MF = *MBB->getParent(); + const AArch64RegisterInfo *RegInfo = static_cast( + MF.getSubtarget().getRegisterInfo()); + DebugLoc DL = MBBI->getDebugLoc(); + unsigned ImmIdx = MBBI->getNumOperands() - 1; + int Imm = MBBI->getOperand(ImmIdx).getImm(); + // All offsets are scaled, so account for it. + Imm *= 8; + MachineInstrBuilder MIB; + + switch (Opc) { + default: + assert(false && "No SEH Opcode for this instruction"); + case AArch64::LDPDpost: + Imm = -Imm; + case AArch64::STPDpre: { + unsigned Reg0 = MBBI->getOperand(1).getReg() - AArch64::D0; + unsigned Reg1 = MBBI->getOperand(2).getReg() - AArch64::D0; + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X)) + .addImm(Reg0 - AArch64::D0) + .addImm(Reg1 - AArch64::D0) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + case AArch64::LDPXpost: + Imm = -Imm; + case AArch64::STPXpre: { + unsigned Reg0 = MBBI->getOperand(1).getReg(); + unsigned Reg1 = MBBI->getOperand(2).getReg(); + if (Reg0 == AArch64::FP && Reg1 == AArch64::LR) + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X)) + .addImm(Imm) + .setMIFlag(Flag); + else + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X)) + .addImm(Reg0 - AArch64::X0) + .addImm(Reg1 - AArch64::X0) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + case AArch64::LDRDpost: + Imm = -Imm; + case AArch64::STRDpre: { + unsigned Reg = MBBI->getOperand(1).getReg() - AArch64::D0; + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X)) + .addImm(Reg) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + case AArch64::LDRXpost: + Imm = -Imm; + case AArch64::STRXpre: { + unsigned Reg = MBBI->getOperand(1).getReg() - AArch64::X0; + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X)) + .addImm(Reg) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + case AArch64::STPDi: + case AArch64::LDPDi: { + unsigned Reg0 = MBBI->getOperand(0).getReg() - AArch64::D0; + unsigned Reg1 = MBBI->getOperand(1).getReg() - AArch64::D0; + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP)) + .addImm(Reg0) + .addImm(Reg1) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + case AArch64::STPXi: + case AArch64::LDPXi: { + unsigned Reg0 = MBBI->getOperand(0).getReg(); + unsigned Reg1 = MBBI->getOperand(1).getReg(); + if (MBBI->getOperand(0).getReg() == AArch64::FP && + MBBI->getOperand(1).getReg() == AArch64::LR) + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR)) + .addImm(Imm) + .setMIFlag(Flag); + else + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP)) + .addImm(Reg0 - AArch64::X0) + .addImm(Reg1 - AArch64::X0) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + case AArch64::STRXui: + case AArch64::LDRXui: { + unsigned Reg = MBBI->getOperand(0).getReg() - AArch64::X0; + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg)) + .addImm(Reg) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + case AArch64::STRDui: + case AArch64::LDRDui: { + unsigned Reg = MBBI->getOperand(0).getReg() - AArch64::D0; + MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg)) + .addImm(Reg) + .addImm(Imm) + .setMIFlag(Flag); + break; + } + } + auto I = MBB->insertAfter(MBBI, MIB); + return I; +} + +// Fix up the SEH opcode associated with the save/restore instruction. +static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI, + unsigned LocalStackSize) { + MachineOperand *ImmOpnd = nullptr; + unsigned ImmIdx = MBBI->getNumOperands() - 1; + switch (MBBI->getOpcode()) { + default: + assert(false && "Fix the offset in the SEH instruction"); + break; + case AArch64::SEH_SaveFPLR: + case AArch64::SEH_SaveRegP: + case AArch64::SEH_SaveReg: + case AArch64::SEH_SaveFRegP: + case AArch64::SEH_SaveFReg: + ImmOpnd = &MBBI->getOperand(ImmIdx); + break; + } + if (ImmOpnd) + ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize); +} + // Convert callee-save register save/restore instruction to do stack pointer // decrement/increment to allocate/deallocate the callee-save stack area by // converting store/load to use pre/post increment version. static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { + const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc, + bool NeedsWinCFI, bool InProlog = true) { // Ignore instructions that do not operate on SP, i.e. shadow call stack // instructions. while (MBBI->getOpcode() == AArch64::STRXpost || @@ -483,6 +627,13 @@ break; } + // Get rid of the SEH code associated with the old instruction. + if (NeedsWinCFI) { + auto SEH = std::next(MBBI); + if (AArch64InstrInfo::isSEHInstruction(*SEH)) + SEH->eraseFromParent(); + } + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); MIB.addReg(AArch64::SP, RegState::Define); @@ -507,13 +658,22 @@ MIB.setMIFlags(MBBI->getFlags()); MIB.setMemRefs(MBBI->memoperands()); + // Generate a new SEH code that corresponds to the new instruction. + if (NeedsWinCFI) + InsertSEH(*MIB, *TII, + InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy); + return std::prev(MBB.erase(MBBI)); } // Fixup callee-save register save/restore instructions to take into account // combined SP bump by adding the local stack size to the stack offsets. static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, - unsigned LocalStackSize) { + unsigned LocalStackSize, + bool NeedsWinCFI) { + if (AArch64InstrInfo::isSEHInstruction(MI)) + return; + unsigned Opc = MI.getOpcode(); // Ignore instructions that do not operate on SP, i.e. shadow call stack @@ -538,6 +698,14 @@ // All generated opcodes have scaled offsets. assert(LocalStackSize % 8 == 0); OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); + + if (NeedsWinCFI) { + auto MBBI = std::next(MachineBasicBlock::iterator(MI)); + assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction"); + assert(AArch64InstrInfo::isSEHInstruction(*MBBI) && + "Expecting a SEH instruction"); + fixupSEHOpcode(MBBI, LocalStackSize); + } } static void adaptForLdStOpt(MachineBasicBlock &MBB, @@ -584,6 +752,9 @@ AArch64FunctionInfo *AFI = MF.getInfo(); bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry(); bool HasFP = hasFP(MF); + bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + F.needsUnwindTableEntry(); + MF.setHasWinCFI(NeedsWinCFI); // At this point, we're going to decide whether or not the function uses a // redzone. In most cases, the function doesn't have a redzone so let's @@ -599,6 +770,19 @@ .setMIFlag(MachineInstr::FrameSetup); } + // The very last FrameSetup instruction indicates the end of prologue. Emit a + // SEH opcode indicating the prologue end. + if (NeedsWinCFI) { + MachineBasicBlock::iterator LastFrameSetupI = MBB.begin(); + while (LastFrameSetupI != MBB.end() && + LastFrameSetupI->getFlag(MachineInstr::FrameSetup)) + ++LastFrameSetupI; + DebugLoc NewDL = + (MBB.end() == LastFrameSetupI) ? DL : LastFrameSetupI->getDebugLoc(); + BuildMI(MBB, LastFrameSetupI, DL, TII->get(AArch64::SEH_PrologEnd)) + .setMIFlag(MachineInstr::FrameSetup); + } + // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. if (MF.getFunction().getCallingConv() == CallingConv::GHC) @@ -607,7 +791,6 @@ int NumBytes = (int)MFI.getStackSize(); if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { assert(!HasFP && "unexpected function without stack frame but with FP"); - // All of the stack allocation is for locals. AFI->setLocalStackSize(NumBytes); @@ -620,7 +803,7 @@ ++NumRedZoneFunctions; } else { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + MachineInstr::FrameSetup, false, NeedsWinCFI); // Label used to tie together the PROLOG_LABEL and the MachineMoves. MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); @@ -641,15 +824,14 @@ auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; // All of the remaining stack allocations are for locals. AFI->setLocalStackSize(NumBytes - PrologueSaveSize); - bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); if (CombineSPBump) { emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + MachineInstr::FrameSetup, false, NeedsWinCFI); NumBytes = 0; } else if (PrologueSaveSize != 0) { - MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII, - -PrologueSaveSize); + MBBI = convertCalleeSaveRestoreToSPPrePostIncDec( + MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI); NumBytes -= PrologueSaveSize; } assert(NumBytes >= 0 && "Negative stack allocation size!?"); @@ -660,9 +842,11 @@ MachineBasicBlock::iterator End = MBB.end(); while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) { if (CombineSPBump) - fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize()); + fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), + NeedsWinCFI); ++MBBI; } + if (HasFP) { // Only set up FP if we actually need to. Frame pointer is fp = // sp - fixedobject - 16. @@ -675,7 +859,7 @@ // Note: All stores of callee-saved registers are marked as "FrameSetup". // This code marks the instruction(s) that set the FP also. emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, - MachineInstr::FrameSetup); + MachineInstr::FrameSetup, false, NeedsWinCFI); } if (windowsRequiresStackProbe(MF, NumBytes)) { @@ -733,7 +917,7 @@ // the correct value here, as NumBytes also includes padding bytes, // which shouldn't be counted here. emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, - MachineInstr::FrameSetup); + MachineInstr::FrameSetup, false, NeedsWinCFI); if (NeedsRealignment) { const unsigned Alignment = MFI.getMaxAlignment(); @@ -756,6 +940,10 @@ .addReg(scratchSPReg, RegState::Kill) .addImm(andMaskEncoded); AFI->setStackRealigned(true); + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)) + .addImm(NumBytes & andMaskEncoded) + .setMIFlag(MachineInstr::FrameSetup); } } @@ -769,6 +957,9 @@ if (RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop)) + .setMIFlag(MachineInstr::FrameSetup); } if (needsFrameMoves) { @@ -898,14 +1089,22 @@ const TargetInstrInfo *TII = Subtarget.getInstrInfo(); DebugLoc DL; bool IsTailCallReturn = false; + bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF.getFunction().needsUnwindTableEntry(); + if (MBB.end() != MBBI) { DL = MBBI->getDebugLoc(); unsigned RetOpcode = MBBI->getOpcode(); IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri; } + + if (NeedsWinCFI) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_EpilogEnd)) + .setMIFlag(MachineInstr::FrameDestroy); + int NumBytes = MFI.getStackSize(); - const AArch64FunctionInfo *AFI = MF.getInfo(); + AArch64FunctionInfo *AFI = MF.getInfo(); // All calls are tail calls in GHC calling conv, and functions have no // prologue/epilogue. @@ -970,13 +1169,15 @@ if (!CombineSPBump && PrologueSaveSize != 0) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); + while (AArch64InstrInfo::isSEHInstruction(*Pop)) + Pop = std::prev(Pop); // Converting the last ldp to a post-index ldp is valid only if the last // ldp's offset is 0. const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1); // If the offset is 0, convert it to a post-index ldp. if (OffsetOp.getImm() == 0) { - convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII, - PrologueSaveSize); + convertCalleeSaveRestoreToSPPrePostIncDec( + MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, false); } else { // If not, make sure to emit an add after the last ldp. // We're doing this by transfering the size to be restored from the @@ -996,15 +1197,20 @@ if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) { ++LastPopI; break; - } else if (CombineSPBump) - fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize()); + } else if (CombineSPBump && !AArch64InstrInfo::isSEHInstruction(*LastPopI)) + fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(), + NeedsWinCFI); } + if (NeedsWinCFI) + BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart)) + .setMIFlag(MachineInstr::FrameDestroy); + // If there is a single SP update, insert it before the ret and we're done. if (CombineSPBump) { emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, - NumBytes + AfterCSRPopSize, TII, - MachineInstr::FrameDestroy); + NumBytes + AfterCSRPopSize, TII, MachineInstr::FrameDestroy, + false, NeedsWinCFI); return; } @@ -1032,7 +1238,8 @@ adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, - StackRestoreBytes, TII, MachineInstr::FrameDestroy); + StackRestoreBytes, TII, MachineInstr::FrameDestroy, false, + NeedsWinCFI); if (Done) return; @@ -1046,10 +1253,10 @@ if (MFI.hasVarSizedObjects() || AFI->isStackRealigned()) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, -AFI->getCalleeSavedStackSize() + 16, TII, - MachineInstr::FrameDestroy); + MachineInstr::FrameDestroy, false, NeedsWinCFI); else if (NumBytes) emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, - MachineInstr::FrameDestroy); + MachineInstr::FrameDestroy, false, NeedsWinCFI); // This must be placed after the callee-save restore code because that code // assumes the SP is at the same location as it was after the callee-save save @@ -1070,7 +1277,8 @@ adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, - AfterCSRPopSize, TII, MachineInstr::FrameDestroy); + AfterCSRPopSize, TII, MachineInstr::FrameDestroy, false, + NeedsWinCFI); } } @@ -1196,6 +1404,20 @@ Attrs.hasAttrSomewhere(Attribute::SwiftError)); } +static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, + bool NeedsWinCFI) { + // If we are generating register pairs for a Windows function that requires + // EH support, then pair consecutive registers only. There are no unwind + // opcodes for saves/restores of non-consectuve register pairs. + // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x. + // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling + if (!NeedsWinCFI) + return false; + if (Reg2 == Reg1 + 1) + return false; + return true; +} + namespace { struct RegPairInfo { @@ -1220,6 +1442,8 @@ if (CSI.empty()) return; + bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF.getFunction().needsUnwindTableEntry(); AArch64FunctionInfo *AFI = MF.getInfo(); MachineFrameInfo &MFI = MF.getFrameInfo(); CallingConv::ID CC = MF.getFunction().getCallingConv(); @@ -1233,6 +1457,11 @@ "Odd number of callee-saved regs to spill!"); int Offset = AFI->getCalleeSavedStackSize(); + // On Win64, we don't necessarily pair all registers or all registers except + // possibly one because the unwind opcodes for stack unwinding require that + // only consecutive registers are paired. This flag makes sure that the + // padding below is done only once. + bool FixupDone = false; for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; RPI.Reg1 = CSI[i].getReg(); @@ -1241,11 +1470,11 @@ AArch64::FPR64RegClass.contains(RPI.Reg1)); RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); - // Add the next reg to the pair if it is in the same register class. if (i + 1 < Count) { unsigned NextReg = CSI[i + 1].getReg(); - if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || - (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) + if (((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || + (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) && + !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI)) RPI.Reg2 = NextReg; } @@ -1279,9 +1508,11 @@ RPI.FrameIdx = CSI[i].getFrameIdx(); - if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { + if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired() && + !FixupDone) { // Round up size of non-pair to pair size if we need to pad the // callee-save area to ensure 16-byte alignment. + FixupDone = NeedsWinCFI; Offset -= 16; assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16); MFI.setObjectAlignment(RPI.FrameIdx, 16); @@ -1305,6 +1536,8 @@ const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF.getFunction().needsUnwindTableEntry(); DebugLoc DL; SmallVector RegPairs; @@ -1353,24 +1586,48 @@ if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; dbgs() << ")\n"); + assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) && + "Windows unwdinding requires a consecutive (FP,LR) pair"); + MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (!MRI.isReserved(Reg1)) MBB.addLiveIn(Reg1); - if (RPI.isPaired()) { - if (!MRI.isReserved(Reg2)) - MBB.addLiveIn(Reg2); - MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); + + // Windows unwinding codes require that gprs be consecutive if they are paired. + if (NeedsWinCFI) { + MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)); MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), + MachineMemOperand::MOStore, 8, 8)); + if (RPI.isPaired()) { + if (!MRI.isReserved(Reg2)) + MBB.addLiveIn(Reg2); + MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); + MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), + MachineMemOperand::MOStore, 8, 8)); + } + MIB.addReg(AArch64::SP) + .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit + .setMIFlag(MachineInstr::FrameSetup); + InsertSEH(MIB, TII, MachineInstr::FrameSetup); + } else { + if (RPI.isPaired()) { + if (!MRI.isReserved(Reg2)) + MBB.addLiveIn(Reg2); + MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); + MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), MachineMemOperand::MOStore, 8, 8)); + } + MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) + .addReg(AArch64::SP) + .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit + .setMIFlag(MachineInstr::FrameSetup); + MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), + MachineMemOperand::MOStore, 8, 8)); } - MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) - .addReg(AArch64::SP) - .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit - .setMIFlag(MachineInstr::FrameSetup); - MIB.addMemOperand(MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), - MachineMemOperand::MOStore, 8, 8)); } return true; } @@ -1383,6 +1640,8 @@ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); DebugLoc DL; SmallVector RegPairs; + bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + MF.getFunction().needsUnwindTableEntry(); if (MI != MBB.end()) DL = MI->getDebugLoc(); @@ -1415,19 +1674,37 @@ dbgs() << ")\n"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); - if (RPI.isPaired()) { - MIB.addReg(Reg2, getDefRegState(true)); + + if (NeedsWinCFI) { + MIB.addReg(Reg1, getDefRegState(true)); MIB.addMemOperand(MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), + MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), + MachineMemOperand::MOLoad, 8, 8)); + if (RPI.isPaired()) { + MIB.addReg(Reg2, getDefRegState(true)); + MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), + MachineMemOperand::MOLoad, 8, 8)); + } + MIB.addReg(AArch64::SP) + .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit + .setMIFlag(MachineInstr::FrameDestroy); + InsertSEH(MIB, TII, MachineInstr::FrameDestroy); + } else { + if (RPI.isPaired()) { + MIB.addReg(Reg2, getDefRegState(true)); + MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), + MachineMemOperand::MOLoad, 8, 8)); + } + MIB.addReg(Reg1, getDefRegState(true)) + .addReg(AArch64::SP) + .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit + .setMIFlag(MachineInstr::FrameDestroy); + MIB.addMemOperand(MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), MachineMemOperand::MOLoad, 8, 8)); } - MIB.addReg(Reg1, getDefRegState(true)) - .addReg(AArch64::SP) - .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit - .setMIFlag(MachineInstr::FrameDestroy); - MIB.addMemOperand(MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), - MachineMemOperand::MOLoad, 8, 8)); }; if (ReverseCSRRestoreSeq) Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -259,6 +259,9 @@ /// Returns true if the instruction has a shift by immediate that can be /// executed in one cycle less. bool isFalkorShiftExtFast(const MachineInstr &MI) const; + /// Return true if the instructions is a SEH instruciton used for unwinding + /// on Windows. + static bool isSEHInstruction(const MachineInstr &MI); private: /// Sets the offsets on outlined instructions in \p MBB which use SP @@ -286,7 +289,7 @@ const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag = MachineInstr::NoFlags, - bool SetNZCV = false); + bool SetNZCV = false, bool NeedsWinCFI = false); /// rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the /// FP. Return false if the offset could not be handled directly in MI, and Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1055,6 +1055,32 @@ } } +bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + default: + return false; + case AArch64::SEH_StackAlloc: + case AArch64::SEH_SaveFPLR: + case AArch64::SEH_SaveFPLR_X: + case AArch64::SEH_SaveReg: + case AArch64::SEH_SaveReg_X: + case AArch64::SEH_SaveRegP: + case AArch64::SEH_SaveRegP_X: + case AArch64::SEH_SaveFReg: + case AArch64::SEH_SaveFReg_X: + case AArch64::SEH_SaveFRegP: + case AArch64::SEH_SaveFRegP_X: + case AArch64::SEH_SetFP: + case AArch64::SEH_AddFP: + case AArch64::SEH_Nop: + case AArch64::SEH_PrologEnd: + case AArch64::SEH_EpilogStart: + case AArch64::SEH_EpilogEnd: + return true; + } +} + bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const { @@ -2952,7 +2978,8 @@ MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int Offset, const TargetInstrInfo *TII, - MachineInstr::MIFlag Flag, bool SetNZCV) { + MachineInstr::MIFlag Flag, bool SetNZCV, + bool NeedsWinCFI) { if (DestReg == SrcReg && Offset == 0) return; @@ -3007,6 +3034,21 @@ .addImm(Offset) .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)) .setMIFlag(Flag); + + if (NeedsWinCFI) { + if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) || + (SrcReg == AArch64::FP && DestReg == AArch64::SP)) { + if (Offset == 0) + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)). + setMIFlag(Flag); + else + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP)). + addImm(Offset).setMIFlag(Flag); + } else if (DestReg == AArch64::SP) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc)). + addImm(Offset).setMIFlag(Flag); + } + } } MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( Index: lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.cpp +++ lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -42,6 +42,8 @@ const MCPhysReg * AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { assert(MF && "Invalid MachineFunction pointer."); + if (MF->getSubtarget().isTargetWindows()) + return CSR_Win_AArch64_AAPCS_SaveList; if (MF->getFunction().getCallingConv() == CallingConv::GHC) // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around Index: test/CodeGen/AArch64/win64_vararg.ll =================================================================== --- test/CodeGen/AArch64/win64_vararg.ll +++ test/CodeGen/AArch64/win64_vararg.ll @@ -104,7 +104,7 @@ ; CHECK-LABEL: fp ; CHECK: str x21, [sp, #-96]! -; CHECK: stp x20, x19, [sp, #16] +; CHECK: stp x19, x20, [sp, #16] ; CHECK: stp x29, x30, [sp, #32] ; CHECK: add x29, sp, #32 ; CHECK: add x8, x29, #24 @@ -125,7 +125,7 @@ ; CHECK: mov x4, xzr ; CHECK: bl __stdio_common_vsprintf ; CHECK: ldp x29, x30, [sp, #32] -; CHECK: ldp x20, x19, [sp, #16] +; CHECK: ldp x19, x20, [sp, #16] ; CHECK: cmp w0, #0 ; CHECK: csinv w0, w0, wzr, ge ; CHECK: ldr x21, [sp], #96 @@ -151,8 +151,8 @@ ; CHECK-LABEL: vla ; CHECK: str x23, [sp, #-112]! -; CHECK: stp x22, x21, [sp, #16] -; CHECK: stp x20, x19, [sp, #32] +; CHECK: stp x21, x22, [sp, #16] +; CHECK: stp x19, x20, [sp, #32] ; CHECK: stp x29, x30, [sp, #48] ; CHECK: add x29, sp, #48 ; CHECK: add x8, x29, #16 @@ -183,8 +183,8 @@ ; CHECK: mov sp, [[REG2]] ; CHECK: sub sp, x29, #48 ; CHECK: ldp x29, x30, [sp, #48] -; CHECK: ldp x20, x19, [sp, #32] -; CHECK: ldp x22, x21, [sp, #16] +; CHECK: ldp x19, x20, [sp, #32] +; CHECK: ldp x21, x22, [sp, #16] ; CHECK: ldr x23, [sp], #112 ; CHECK: ret define void @vla(i32, i8*, ...) local_unnamed_addr { @@ -212,8 +212,9 @@ ; CHECK-LABEL: snprintf ; CHECK: sub sp, sp, #96 -; CHECK: stp x21, x20, [sp, #16] -; CHECK: stp x19, x30, [sp, #32] +; CHECK: str x21, [sp, #16] +; CHECK: stp x19, x20, [sp, #24] +; CHECK: str x30, [sp, #40] ; CHECK: add x8, sp, #56 ; CHECK: mov x19, x2 ; CHECK: mov x20, x1 @@ -231,8 +232,9 @@ ; CHECK: mov x3, x19 ; CHECK: mov x4, xzr ; CHECK: bl __stdio_common_vsprintf -; CHECK: ldp x19, x30, [sp, #32] -; CHECK: ldp x21, x20, [sp, #16] +; CHECK: ldr x30, [sp, #40] +; CHECK: ldp x19, x20, [sp, #24] +; CHECK: ldr x21, [sp, #16] ; CHECK: cmp w0, #0 ; CHECK: csinv w0, w0, wzr, ge ; CHECK: add sp, sp, #96 Index: test/CodeGen/AArch64/wineh-pei.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/wineh-pei.mir @@ -0,0 +1,75 @@ +# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog -stop-after=prologepilog | FileCheck %s + +# Test Win64 unwind opcodes generated by the PEI. + +# CHECK: frame-setup SEH_SaveRegP_X 27, 28, -80 +# CHECK: frame-setup SEH_SaveRegP 25, 26, 16 +# CHECK: frame-setup SEH_SaveRegP 23, 24, 32 +# CHECK: frame-setup SEH_SaveRegP 21, 22, 48 +# CHECK: frame-setup SEH_SaveRegP 19, 20, 64 +# CHECK: frame-setup SEH_PrologEnd +# CHECK: frame-destroy SEH_EpilogStart +# CHECK: frame-destroy SEH_SaveRegP 19, 20, 64 +# CHECK: frame-destroy SEH_SaveRegP 21, 22, 48 +# CHECK: frame-destroy SEH_SaveRegP 23, 24, 32 +# CHECK: frame-destroy SEH_SaveRegP 25, 26, 16 +# CHECK: frame-destroy SEH_SaveRegP_X 27, 28, -80 +# CHECK: frame-destroy SEH_EpilogEnd +--- +name: test +alignment: 2 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +frameInfo: + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + $x19 = ADDXrr $x0, killed $x1 + $x20 = ADDXrr $x19, killed $x0 + $x21 = ADDXrr $x20, killed $x19 + $x22 = ADDXrr $x21, killed $x20 + $x23 = ADDXrr $x22, killed $x21 + $x24 = ADDXrr $x23, killed $x22 + $x25 = ADDXrr $x24, killed $x23 + $x26 = ADDXrr $x25, killed $x24 + $x27 = ADDXrr $x26, killed $x25 + $x28 = ADDXrr $x27, killed $x26 + $x0 = COPY $x28 + RET_ReallyLR implicit $x0 +... + +# CHECK: frame-setup SEH_SaveRegP_X 27, 28, -64 +# CHECK: frame-setup SEH_SaveRegP 25, 26, 16 +# CHECK: frame-setup SEH_SaveReg 23, 32 +# CHECK: frame-setup SEH_SaveRegP 21, 22, 40 +# CHECK: frame-setup SEH_SaveReg 19, 56 +# CHECK: frame-setup SEH_PrologEnd +--- +name: test2 +alignment: 2 +tracksRegLiveness: true +liveins: + - { reg: '$w0' } +frameInfo: + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: true +body: | + bb.0.entry: + liveins: $x0, $x1 + + $x19 = ADDXrr $x0, killed $x1 + $x21 = ADDXrr $x19, $x19 + $x22 = ADDXrr $x21, killed $x19 + $x23 = ADDXrr $x22, killed $x21 + $x25 = ADDXrr $x23, $x23 + $x26 = ADDXrr $x25, killed $x23 + $x27 = ADDXrr $x26, killed $x25 + $x28 = ADDXrr $x27, killed $x26 + $x0 = COPY $x28 + RET_ReallyLR implicit $x0 +... +