Index: include/llvm/CodeGen/MachineFrameInfo.h =================================================================== --- include/llvm/CodeGen/MachineFrameInfo.h +++ include/llvm/CodeGen/MachineFrameInfo.h @@ -482,7 +482,8 @@ /// efficiency. By default, fixed objects are immutable. This returns an /// index with a negative value. /// - int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable); + int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable, + bool isSpillSlot = false); /// isFixedObjectIndex - Returns true if the specified index corresponds to a Index: lib/CodeGen/AsmPrinter/Win64Exception.cpp =================================================================== --- lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -78,9 +78,9 @@ if (!shouldEmitPersonality) return; - MCSymbol *GCCHandlerSym = - Asm->GetExternalSymbolSymbol("_GCC_specific_handler"); - Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true); + const MCSymbol *PersHandlerSym = TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, + Asm->TM, MMI); + Asm->OutStreamer.EmitWin64EHHandler(PersHandlerSym, true, true); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); @@ -99,15 +99,8 @@ MMI->TidyLandingPads(); if (shouldEmitPersonality) { - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; - const MCSymbol *Sym = - TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); - Asm->OutStreamer.PushSection(); Asm->OutStreamer.EmitWin64EHHandlerData(); - Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext), - 4); EmitExceptionTable(); Asm->OutStreamer.PopSection(); } Index: lib/CodeGen/MachineFunction.cpp =================================================================== --- lib/CodeGen/MachineFunction.cpp +++ lib/CodeGen/MachineFunction.cpp @@ -457,7 +457,7 @@ /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table. /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a /// normal 'L' label is returned. -MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, +MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { const DataLayout *DL = getTarget().getDataLayout(); assert(JumpTableInfo && "No jump tables"); @@ -533,7 +533,7 @@ Alignment = clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); + Alignment, getFrameLowering()->getStackAlignment()); CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -551,7 +551,7 @@ Alignment = clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); + Alignment, getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; @@ -563,7 +563,7 @@ /// index with a negative value. /// int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable) { + bool Immutable, bool isSpillSlot) { assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); // The alignment of the frame index can be determined from its offset from // the incoming frame position. If the frame object is at offset 32 and @@ -574,10 +574,9 @@ Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, - Align, getFrameLowering()->getStackAlignment()); + Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, - /*isSS*/ false, - /*Alloca*/ nullptr)); + isSpillSlot, /*Alloca*/ nullptr)); return -++NumFixedObjects; } @@ -849,10 +848,10 @@ if (isa(A->getType()) || isa(A->getType()) || isa(B->getType()) || isa(B->getType())) return false; - + // For now, only support constants with the same size. uint64_t StoreSize = TD->getTypeStoreSize(A->getType()); - if (StoreSize != TD->getTypeStoreSize(B->getType()) || + if (StoreSize != TD->getTypeStoreSize(B->getType()) || StoreSize > 128) return false; @@ -882,7 +881,7 @@ /// an existing one. User must specify the log2 of the minimum required /// alignment for the object. /// -unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, +unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, unsigned Alignment) { assert(Alignment && "Alignment must be specified!"); if (Alignment > PoolAlignment) PoolAlignment = Alignment; Index: lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- lib/CodeGen/PrologEpilogInserter.cpp +++ lib/CodeGen/PrologEpilogInserter.cpp @@ -309,7 +309,7 @@ if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); + FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true, true); } I->setFrameIdx(FrameIdx); @@ -483,6 +483,8 @@ // callee saved registers. if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; // If the stack grows down, we need to add the size to find the lowest // address of the object. Offset += MFI->getObjectSize(i); @@ -496,6 +498,8 @@ } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { + if (MFI->isDeadObjectIndex(i)) + continue; unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; Index: lib/MC/MCObjectFileInfo.cpp =================================================================== --- lib/MC/MCObjectFileInfo.cpp +++ lib/MC/MCObjectFileInfo.cpp @@ -632,11 +632,16 @@ // though it contains relocatable pointers. In PIC mode, this is probably a // big runtime hit for C++ apps. Either the contents of the LSDA need to be // adjusted or this should be a data section. - LSDASection = - Ctx->getCOFFSection(".gcc_except_table", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); + if (T.isOSWindows() && T.getArch() == Triple::x86_64) { + // On Windows 64 with SEH, the LSDA is emitted into the .xdata section + LSDASection = 0; + } else { + LSDASection = + Ctx->getCOFFSection(".gcc_except_table", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + } // Debug info. COFFDebugSymbolsSection = Index: lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp =================================================================== --- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -142,8 +142,11 @@ void X86MCAsmInfoMicrosoft::anchor() { } X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { - if (Triple.getArch() == Triple::x86_64) + if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; + PointerSize = 8; + ExceptionsType = ExceptionHandling::Win64; + } AssemblerDialect = AsmWriterFlavor; @@ -162,12 +165,14 @@ PointerSize = 8; } + if (Triple.isOSWindows() && Triple.getArch() == Triple::x86_64) + ExceptionsType = ExceptionHandling::Win64; + else + ExceptionsType = ExceptionHandling::DwarfCFI; + AssemblerDialect = AsmWriterFlavor; TextAlignFillValue = 0x90; - // Exceptions handling - ExceptionsType = ExceptionHandling::DwarfCFI; - UseIntegratedAssembler = true; } Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -313,63 +314,24 @@ MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + bool HasFP = hasFP(MF); // Add callee saved registers to move list. const std::vector &CSI = MFI->getCalleeSavedInfo(); if (CSI.empty()) return; - const X86RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - bool HasFP = hasFP(MF); - - // Calculate amount of bytes used for return address storing. - int stackGrowth = -RegInfo->getSlotSize(); - - // FIXME: This is dirty hack. The code itself is pretty mess right now. - // It should be rewritten from scratch and generalized sometimes. - - // Determine maximum offset (minimum due to stack growth). - int64_t MaxOffset = 0; - for (std::vector::const_iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) - MaxOffset = std::min(MaxOffset, - MFI->getObjectOffset(I->getFrameIdx())); - // Calculate offsets. - int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; for (std::vector::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); - Offset = MaxOffset - Offset + saveAreaOffset; - - // Don't output a new machine move if we're re-saving the frame - // pointer. This happens when the PrologEpilogInserter has inserted an extra - // "PUSH" of the frame pointer -- the "emitPrologue" method automatically - // generates one when frame pointers are used. If we generate a "machine - // move" for this extra "PUSH", the linker will lose track of the fact that - // the frame pointer should have the value of the first "PUSH" when it's - // trying to unwind. - // - // FIXME: This looks inelegant. It's possibly correct, but it's covering up - // another bug. I.e., one where we generate a prolog like this: - // - // pushl %ebp - // movl %esp, %ebp - // pushl %ebp - // pushl %esi - // ... - // - // The immediate re-push of EBP is unnecessary. At the least, it's an - // optimization bug. EBP can be used as a scratch register in certain - // cases, but probably not when we have a frame pointer. + if (HasFP && FramePtr == Reg) continue; unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); - unsigned CFIIndex = - MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, - Offset)); + unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset( + nullptr, DwarfReg, Offset)); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); } @@ -396,6 +358,75 @@ /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to /// generate the exception handling frames. + +// Here's a gist of what gets emitted: +// +// .cfi_def_cfa_offset 16 +// .cfi_offset %rbp, -16 +// +// ; Establish frame pointer, if needed +// [if needs FP] +// push %rbp +// .cfi_def_cfa_offset 16 +// .cfi_offset %rbp, -16 +// .seh_pushreg %rpb +// mov %rsp, %rbp +// .cfi_def_cfa_register %rbp +// +// ; Spill general-purpose registers +// [for all callee-saved GRPs] +// pushq %rXX +// [if not needs FP] +// .cfi_def_cfa_offset NNN +// .seh_pushreg %rXX +// +// ; If the required stack alignment > default stack alignment +// ; rsp needs to be re-aligned. This creates a "re-alignment gap" +// ; of unknown size in the stack frame. +// [if stack needs re-alignment] +// and $MASK, %rsp +// +// ; Allocate space for locals +// [if target is Windows and allocated space > 4096 bytes] +// ; Windows needs special care for allocations larger +// ; than one page. +// mov $NNN, %rax +// call ___chkstk_ms/___chkstk +// sub %rax, %rsp +// [else] +// sub $NNN, %rsp +// +// .cfi_def_cfa_offset MMM +// .seh_stackalloc NNN +// +// [if needs FP] +// .seh_setframe %rbp, KKK +// +// ; Currently only Win64 spills non-GPRs +// [for all callee-saved XMM registers] +// [if needs FP] +// movaps %xmmXX, -NNN(%rbp) +// [else] +// movaps %xmmXX, NNN(%rsp) +// +// [for all callee-saved XMM registers] +// .seh_savexmm 6, NNN +// +// .seh_endprologue +// +// [if needs base pointer] +// mov %rsp, %rbx +// +// ; Emit CFI info for GPRs +// [if needs FP] +// [for all callee-saved registers] +// .cfi_offset %rXX, NNN +// +// Notes: +// - .seh directives are emitted only for Windows 64 ABI +// - .cfi directives are emitted for all other ABIs +// - for 32-bit code, substitute %eXX registers for %rXX + void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -406,8 +437,6 @@ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); X86MachineFunctionInfo *X86FI = MF.getInfo(); - bool needsFrameMoves = MMI.hasDebugInfo() || - Fn->needsUnwindTableEntry(); uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. bool HasFP = hasFP(MF); @@ -415,6 +444,8 @@ bool Is64Bit = STI.is64Bit(); bool IsLP64 = STI.isTarget64BitLP64(); bool IsWin64 = STI.isTargetWin64(); + bool NeedsWin64SEH = IsWin64 && Fn->needsUnwindTableEntry(); + bool NeedsDwarfCFI = !IsWin64 && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); @@ -512,7 +543,7 @@ .addReg(FramePtr, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (needsFrameMoves) { + if (NeedsDwarfCFI) { // Mark the place where EBP/RBP was saved. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -530,13 +561,19 @@ .addCFIIndex(CFIIndex); } + if (NeedsWin64SEH) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) + .addImm(FramePtr) + .setMIFlag(MachineInstr::FrameSetup); + } + // Update EBP with the new base value. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); - if (needsFrameMoves) { + if (NeedsDwarfCFI) { // Mark effective beginning of when frame pointer becomes valid. // Define the current CFA to use the EBP/RBP register. unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); @@ -562,10 +599,10 @@ (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; - MBBI->setFlag(MachineInstr::FrameSetup); + unsigned Reg = MBBI->getOperand(0).getReg(); ++MBBI; - if (!HasFP && needsFrameMoves) { + if (!HasFP && NeedsDwarfCFI) { // Mark callee-saved push instruction. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -575,6 +612,12 @@ .addCFIIndex(CFIIndex); StackOffset += stackGrowth; } + + if (NeedsWin64SEH) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) + .addImm(Reg) + .setMIFlag(MachineInstr::FrameSetup); + } } // Realign stack after we pushed callee-saved registers (so that we'll be @@ -683,23 +726,90 @@ MI->setFlag(MachineInstr::FrameSetup); MBB.insert(MBBI, MI); } - } else if (NumBytes) + } else if (NumBytes) { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, UseLEA, TII, *RegInfo); + } + + const std::vector &CSI = MFI->getCalleeSavedInfo(); + int SEHFrameOffset = 0; + if (NeedsWin64SEH) { + if (HasFP) { + // We need to set frame base offset low enough such that all saved + // register offsets would be positive relative to it, but we can't + // just use NumBytes, because .seh_setframe offset must be <=240. + // So we pretend to have only allocated enough space to spill the + // non-volatile registers. + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + int offset = MFI->getObjectOffset(CSI[i].getFrameIdx()); + SEHFrameOffset = std::max(SEHFrameOffset, abs(offset)); + } + SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(SEHFrameOffset - X86FI->getCalleeSavedFrameSize()) + .setMIFlag(MachineInstr::FrameSetup); + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) + .addImm(FramePtr) + .addImm(SEHFrameOffset) + .setMIFlag(MachineInstr::FrameSetup); + + // Don't care about the rest of stack allocation, because unwinder + // will restore SP to (BP - LastSpillSlotOffset) + } else { + // SP will be the base register for restoring XMMs + if (NumBytes) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + } + } + } + + // Skip the rest of register spilling code + while (MBBI != MBB.end() && + MBBI->getFlag(MachineInstr::FrameSetup)) { + ++MBBI; + } + + // Emit SEH info for non-GPRs + if (NeedsWin64SEH) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg)) + continue; + // Win64 should spill only GPRs and XMMs + assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class"); + + int Offset = getFrameIndexOffset(MF, CSI[i].getFrameIdx()); + Offset += SEHFrameOffset; + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) + .addImm(Reg) + .addImm(Offset) + .setMIFlag(MachineInstr::FrameSetup); + } + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) + .setMIFlag(MachineInstr::FrameSetup); + } // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) { - // Update the frame pointer with the current stack pointer. + // Update the base pointer with the current stack pointer. unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); } - if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { + if (( (!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { // Mark end of stack pointer adjustment. if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. @@ -797,8 +907,8 @@ MachineBasicBlock::iterator PI = std::prev(MBBI); unsigned Opc = PI->getOpcode(); - if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE && - !PI->isTerminator()) + if (Opc != X86::POP32r && Opc != X86::POP64r && + Opc != X86::DBG_VALUE && !PI->isTerminator()) break; --MBBI; @@ -975,43 +1085,57 @@ } bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, + MachineBasicBlock::iterator MI, + const std::vector &CSI_, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; + std::vector &CSI = const_cast&>(CSI_); DebugLoc DL = MBB.findDebugLoc(MI); MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo *MFI = MF.getFrameInfo(); const X86RegisterInfo *RegInfo = static_cast(MF.getTarget().getRegisterInfo()); unsigned SlotSize = RegInfo->getSlotSize(); unsigned FPReg = TRI->getFrameRegister(MF); - unsigned CalleeFrameSize = 0; - + bool HasFP = hasFP(MF); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo(); const X86Subtarget &STI = MF.getTarget().getSubtarget(); + unsigned CalleeSavedFrameSize = 0; + int SpillSlotOffset = getOffsetOfLocalArea() + + X86FI->getTCReturnAddrDelta() + - (HasFP ? 1 : 0) * SlotSize; + // Push GPRs. It increases frame size. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); + if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; + // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - if (Reg == FPReg) + if (Reg == FPReg) { // X86RegisterInfo::emitPrologue will handle spilling of frame register. continue; - CalleeFrameSize += SlotSize; + } + + SpillSlotOffset -= SlotSize; + CalleeSavedFrameSize += SlotSize; + + int SlotIndex = MFI->CreateFixedObject(SlotSize, SpillSlotOffset, true, true); + MFI->RemoveStackObject(CSI[i-1].getFrameIdx()); + CSI[i-1].setFrameIdx(SlotIndex); + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); } - X86FI->setCalleeSavedFrameSize(CalleeFrameSize); + X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); // Make XMM regs spilled. X86 does not have ability of push/pop XMM. // It can be done by spilling XMMs to stack frame. @@ -1024,8 +1148,19 @@ // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), - RC, TRI); + + // ensure alignment + SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment(); + // spill into slot + SpillSlotOffset -= RC->getSize(); + int SlotIndex = MFI->CreateFixedObject(RC->getSize(), SpillSlotOffset, true, true); + MFI->RemoveStackObject(CSI[i-1].getFrameIdx()); + CSI[i-1].setFrameIdx(SlotIndex); + + TII.storeRegToStackSlot(MBB, MI, Reg, true, SlotIndex, RC, TRI); + --MI; + MI->setFlag(MachineInstr::FrameSetup); + ++MI; } return true; @@ -1043,20 +1178,21 @@ MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); const X86Subtarget &STI = MF.getTarget().getSubtarget(); + unsigned FPReg = TRI->getFrameRegister(MF); - // Reload XMMs from stack frame. + // Reload non-GPRs for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - RC, TRI); + + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + RC, TRI); } // POP GPRs. - unsigned FPReg = TRI->getFrameRegister(MF); unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); @@ -1106,7 +1242,7 @@ -(int)SlotSize + TFI.getOffsetOfLocalArea() + TailCallReturnAddrDelta, - true); + true, true); assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); (void)FrameIdx; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -599,7 +599,8 @@ // FIXME - use subtarget debug flags if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() && - !Subtarget->isTargetCygMing()) { + !Subtarget->isTargetCygMing() && + !Subtarget->isTargetWin64()) { setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -110,7 +110,7 @@ // When using segmented stacks these are lowered into instructions which first // check if the current stacklet has enough free memory. If it does, memory is -// allocated by bumping the stack pointer. Otherwise memory is allocated from +// allocated by bumping the stack pointer. Otherwise memory is allocated from // the heap. let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in @@ -197,6 +197,26 @@ } //===----------------------------------------------------------------------===// +// Pseudo instructions used by unwind info. +// +let isPseudo = 1 in { + def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg), + "#SEH_PushReg $reg", []>; + def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), + "#SEH_SaveReg $reg, $dst", []>; + def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), + "#SEH_SaveXMM $reg, $dst", []>; + def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size), + "#SEH_StackAlloc $size", []>; + def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset), + "#SEH_SetFrame $reg, $offset", []>; + def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode), + "#SEH_PushFrame $mode", []>; + def SEH_EndPrologue : I<0, Pseudo, (outs), (ins), + "#SEH_EndPrologue", []>; +} + +//===----------------------------------------------------------------------===// // Pseudo instructions used by segmented stacks. // @@ -371,7 +391,7 @@ def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32, Requires<[In64BitMode]>; - + let Uses = [RAX,RCX,RDI] in def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", [(X86rep_stos i64)], IIC_REP_STOS>, REP, Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86AsmPrinter.h" +#include "X86RegisterInfo.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" #include "llvm/ADT/SmallString.h" @@ -20,6 +21,7 @@ #include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCAsmInfo.h" @@ -779,6 +781,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(*MF, *this); + const X86RegisterInfo *RI = + static_cast(TM.getRegisterInfo()); + switch (MI->getOpcode()) { case TargetOpcode::DBG_VALUE: llvm_unreachable("Should be handled target independently"); @@ -883,6 +888,43 @@ .addReg(X86::R10) .addReg(X86::RAX)); return; + + case X86::SEH_PushReg: + OutStreamer.EmitWin64EHPushReg( + RI->getSEHRegNum(MI->getOperand(0).getImm())); + return; + + case X86::SEH_SaveReg: + OutStreamer.EmitWin64EHSaveReg( + RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + return; + + case X86::SEH_SaveXMM: + OutStreamer.EmitWin64EHSaveXMM( + RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + return; + + case X86::SEH_StackAlloc: + OutStreamer.EmitWin64EHAllocStack( + MI->getOperand(0).getImm()); + return; + + case X86::SEH_SetFrame: + OutStreamer.EmitWin64EHSetFrame( + RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + return; + + case X86::SEH_PushFrame: + OutStreamer.EmitWin64EHPushFrame( + MI->getOperand(0).getImm()); + return; + + case X86::SEH_EndPrologue: + OutStreamer.EmitWin64EHEndProlog(); + return; } MCInst TmpInst; Index: test/CodeGen/X86/2007-05-05-Personality.ll =================================================================== --- test/CodeGen/X86/2007-05-05-Personality.ll +++ test/CodeGen/X86/2007-05-05-Personality.ll @@ -1,12 +1,14 @@ ; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o - | FileCheck %s --check-prefix=LIN -; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s --check-prefix=LIN ; RUN: llc < %s -mtriple=i386-pc-mingw32 -o - | FileCheck %s --check-prefix=WIN ; RUN: llc < %s -mtriple=i686-pc-windows-gnu -o - | FileCheck %s --check-prefix=WIN +; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s --check-prefix=WIN64 ; LIN: .cfi_personality 0, __gnat_eh_personality ; LIN: .cfi_lsda 0, .Lexception0 ; WIN: .cfi_personality 0, ___gnat_eh_personality ; WIN: .cfi_lsda 0, Lexception0 +; WIN64: .seh_handler __gnat_eh_personality +; WIN64: .seh_handlerdata @error = external global i8 @@ -15,7 +17,7 @@ invoke void @raise() to label %eh_then unwind label %unwind -unwind: ; preds = %entry +unwind: ; preds = %entry %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*) catch i8* @error %eh_select = extractvalue { i8*, i32 } %eh_ptr, 1 Index: test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll =================================================================== --- test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll +++ test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll @@ -1,7 +1,7 @@ ; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s ; CHECK: subq $40, %rsp -; CHECK: movaps %xmm8, (%rsp) -; CHECK: movaps %xmm7, 16(%rsp) +; CHECK: movaps %xmm8, 16(%rsp) +; CHECK: movaps %xmm7, (%rsp) define i32 @a() nounwind { entry: Index: test/CodeGen/X86/avx-intel-ocl.ll =================================================================== --- test/CodeGen/X86/avx-intel-ocl.ll +++ test/CodeGen/X86/avx-intel-ocl.ll @@ -7,21 +7,21 @@ declare <16 x float> @func_float16(<16 x float>, <16 x float>) declare i32 @func_int(i32, i32) -; WIN64: testf16_inp +; WIN64-LABEL: testf16_inp ; WIN64: vaddps {{.*}}, {{%ymm[0-1]}} ; WIN64: vaddps {{.*}}, {{%ymm[0-1]}} ; WIN64: leaq {{.*}}(%rsp), %rcx ; WIN64: call ; WIN64: ret -; X32: testf16_inp +; X32-LABEL: testf16_inp ; X32: movl %eax, (%esp) ; X32: vaddps {{.*}}, {{%ymm[0-1]}} ; X32: vaddps {{.*}}, {{%ymm[0-1]}} ; X32: call ; X32: ret -; X64: testf16_inp +; X64-LABEL: testf16_inp ; X64: vaddps {{.*}}, {{%ymm[0-1]}} ; X64: vaddps {{.*}}, {{%ymm[0-1]}} ; X64: leaq {{.*}}(%rsp), %rdi @@ -41,14 +41,14 @@ ;test calling conventions - preserved registers ; preserved ymm6-ymm15 -; WIN64: testf16_regs +; WIN64-LABEL: testf16_regs ; WIN64: call ; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; WIN64: ret ; preserved ymm8-ymm15 -; X64: testf16_regs +; X64-LABEL: testf16_regs ; X64: call ; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} @@ -65,28 +65,30 @@ } ; test calling conventions - prolog and epilog -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64-LABEL: test_prolog_epilog +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill ; WIN64: call -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload - +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload + +; X64-LABEL: test_prolog_epilog ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill @@ -111,12 +113,14 @@ ; test functions with integer parameters ; pass parameters on stack for 32-bit platform +; X32-LABEL: test_int ; X32: movl {{.*}}, 4(%esp) ; X32: movl {{.*}}, (%esp) ; X32: call ; X32: addl {{.*}}, %eax ; pass parameters in registers for 64-bit platform +; X64-LABEL: test_int ; X64: leal {{.*}}, %edi ; X64: movl {{.*}}, %esi ; X64: call @@ -128,21 +132,21 @@ ret i32 %c } -; WIN64: test_float4 +; WIN64-LABEL: test_float4 ; WIN64-NOT: vzeroupper ; WIN64: call ; WIN64-NOT: vzeroupper ; WIN64: call ; WIN64: ret -; X64: test_float4 +; X64-LABEL: test_float4 ; X64-NOT: vzeroupper ; X64: call ; X64-NOT: vzeroupper ; X64: call ; X64: ret -; X32: test_float4 +; X32-LABEL: test_float4 ; X32: vzeroupper ; X32: call ; X32: vzeroupper Index: test/CodeGen/X86/gcc_except_table.ll =================================================================== --- test/CodeGen/X86/gcc_except_table.ll +++ test/CodeGen/X86/gcc_except_table.ll @@ -13,14 +13,14 @@ ; APPLE: GCC_except_table0: ; APPLE: Lexception0: -; MINGW64: .cfi_startproc -; MINGW64: .cfi_personality 0, __gxx_personality_v0 -; MINGW64: .cfi_lsda 0, .Lexception0 -; MINGW64: .cfi_def_cfa_offset 16 +; MINGW64: .seh_proc +; MINGW64: .seh_handler __gxx_personality_v0 +; MINGW64: .seh_setframe 5, 0 ; MINGW64: callq _Unwind_Resume -; MINGW64: .cfi_endproc +; MINGW64: .seh_handlerdata ; MINGW64: GCC_except_table0: ; MINGW64: Lexception0: +; MINGW64: .seh_endproc ; MINGW32: .cfi_startproc ; MINGW32: .cfi_personality 0, ___gxx_personality_v0 Index: test/CodeGen/X86/win64_eh.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/win64_eh.ll @@ -0,0 +1,170 @@ +; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64 +; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=WIN64 + +; Check function without prolog +define void @foo0() uwtable { +entry: + ret void +} +; WIN64-LABEL: foo0: +; WIN64: .seh_proc foo0 +; WIN64: .seh_endprologue +; WIN64: ret +; WIN64: .seh_endproc + +; Checks a small stack allocation +define void @foo1() uwtable { +entry: + %baz = alloca [2000 x i16], align 2 + ret void +} +; WIN64-LABEL: foo1: +; WIN64: .seh_proc foo1 +; WIN64: subq $4000, %rsp +; WIN64: .seh_stackalloc 4000 +; WIN64: .seh_endprologue +; WIN64: addq $4000, %rsp +; WIN64: ret +; WIN64: .seh_endproc + +; Checks a stack allocation requiring call to __chkstk/___chkstk_ms +define void @foo2() uwtable { +entry: + %baz = alloca [4000 x i16], align 2 + ret void +} +; WIN64-LABEL: foo2: +; WIN64: .seh_proc foo2 +; WIN64: movabsq $8000, %rax +; WIN64: callq {{__chkstk|___chkstk_ms}} +; WIN64: subq %rax, %rsp +; WIN64: .seh_stackalloc 8000 +; WIN64: .seh_endprologue +; WIN64: addq $8000, %rsp +; WIN64: ret +; WIN64: .seh_endproc + + +; Checks stack push +define i32 @foo3(i32 %f_arg, i32 %e_arg, i32 %d_arg, i32 %c_arg, i32 %b_arg, i32 %a_arg) uwtable { +entry: + %a = alloca i32 + %b = alloca i32 + %c = alloca i32 + %d = alloca i32 + %e = alloca i32 + %f = alloca i32 + store i32 %a_arg, i32* %a + store i32 %b_arg, i32* %b + store i32 %c_arg, i32* %c + store i32 %d_arg, i32* %d + store i32 %e_arg, i32* %e + store i32 %f_arg, i32* %f + %tmp = load i32* %a + %tmp1 = mul i32 %tmp, 2 + %tmp2 = load i32* %b + %tmp3 = mul i32 %tmp2, 3 + %tmp4 = add i32 %tmp1, %tmp3 + %tmp5 = load i32* %c + %tmp6 = mul i32 %tmp5, 5 + %tmp7 = add i32 %tmp4, %tmp6 + %tmp8 = load i32* %d + %tmp9 = mul i32 %tmp8, 7 + %tmp10 = add i32 %tmp7, %tmp9 + %tmp11 = load i32* %e + %tmp12 = mul i32 %tmp11, 11 + %tmp13 = add i32 %tmp10, %tmp12 + %tmp14 = load i32* %f + %tmp15 = mul i32 %tmp14, 13 + %tmp16 = add i32 %tmp13, %tmp15 + ret i32 %tmp16 +} +; WIN64-LABEL: foo3: +; WIN64: .seh_proc foo3 +; WIN64: pushq %rsi +; WIN64: .seh_pushreg 6 +; WIN64: subq $24, %rsp +; WIN64: .seh_stackalloc 24 +; WIN64: .seh_endprologue +; WIN64: addq $24, %rsp +; WIN64: popq %rsi +; WIN64: ret +; WIN64: .seh_endproc + + +; Check emission of eh handler and handler data +declare i32 @_d_eh_personality(i32, i32, i64, i8*, i8*) +declare void @_d_eh_resume_unwind(i8*) + +declare i32 @bar() + +define i32 @foo4() #0 { +entry: + %step = alloca i32, align 4 + store i32 0, i32* %step + %tmp = load i32* %step + + %tmp1 = invoke i32 @bar() + to label %finally unwind label %landingpad + +finally: + store i32 1, i32* %step + br label %endtryfinally + +landingpad: + %landing_pad = landingpad { i8*, i32 } personality i32 (i32, i32, i64, i8*, i8*)* @_d_eh_personality + cleanup + %tmp3 = extractvalue { i8*, i32 } %landing_pad, 0 + store i32 2, i32* %step + call void @_d_eh_resume_unwind(i8* %tmp3) + unreachable + +endtryfinally: + %tmp10 = load i32* %step + ret i32 %tmp10 +} +; WIN64-LABEL: foo4: +; WIN64: .seh_proc foo4 +; WIN64: .seh_handler _d_eh_personality, @unwind, @except +; WIN64: subq $56, %rsp +; WIN64: .seh_stackalloc 56 +; WIN64: .seh_endprologue +; WIN64: addq $56, %rsp +; WIN64: ret +; WIN64: .seh_handlerdata +; WIN64: .seh_endproc + + +; Check stack re-alignment and xmm spilling +define void @foo5() uwtable { +entry: + %s = alloca i32, align 64 + call void asm sideeffect "", "~{rbx},~{rdi},~{xmm6},~{xmm7}"() + ret void +} +; WIN64-LABEL: foo5: +; WIN64: .seh_proc foo5 +; WIN64: pushq %rbp +; WIN64: .seh_pushreg 5 +; WIN64: movq %rsp, %rbp +; WIN64: pushq %rdi +; WIN64: .seh_pushreg 7 +; WIN64: pushq %rbx +; WIN64: .seh_pushreg 3 +; WIN64: andq $-64, %rsp +; WIN64: subq $128, %rsp +; WIN64: .seh_stackalloc 48 +; WIN64: .seh_setframe 5, 64 +; WIN64: movaps %xmm7, -32(%rbp) # 16-byte Spill +; WIN64: movaps %xmm6, -48(%rbp) # 16-byte Spill +; WIN64: .seh_savexmm 6, 16 +; WIN64: .seh_savexmm 7, 32 +; WIN64: .seh_endprologue +; WIN64: movaps -48(%rbp), %xmm6 # 16-byte Reload +; WIN64: movaps -32(%rbp), %xmm7 # 16-byte Reload +; WIN64: leaq -16(%rbp), %rsp +; WIN64: popq %rbx +; WIN64: popq %rdi +; WIN64: popq %rbp +; WIN64: retq +; WIN64: .seh_endproc