Index: include/llvm/CodeGen/MachineFrameInfo.h =================================================================== --- include/llvm/CodeGen/MachineFrameInfo.h +++ include/llvm/CodeGen/MachineFrameInfo.h @@ -484,6 +484,9 @@ /// int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable); + /// CreateFixedSpillStackObject - Create a spill slot at a fixed location + /// on the stack. Returns an index with a negative value. + int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset); /// isFixedObjectIndex - Returns true if the specified index corresponds to a /// fixed stack object. Index: include/llvm/Target/TargetFrameLowering.h =================================================================== --- include/llvm/Target/TargetFrameLowering.h +++ include/llvm/Target/TargetFrameLowering.h @@ -93,6 +93,17 @@ /// stack pointer. virtual bool isFPCloseToIncomingSP() const { return true; } + /// assignCalleeSavedSpillSlots - Allows target to override spill slot assignment + /// logic. If implemented, assignCalleeSavedSpillSlots() should assign frame slots + /// to all CSI entries and return true. If this method returns false, spill slots + /// will be assigned using generic implementation. + /// assignCalleeSavedSpillSlots() may add, delete or rearrange elements of CSI. + virtual bool assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const { + return false; + } + /// getCalleeSavedSpillSlots - This method returns a pointer to an array of /// pairs, that contains an entry for each callee saved register that must be /// spilled to a particular stack location if it is spilled. Index: lib/CodeGen/AsmPrinter/Win64Exception.cpp =================================================================== --- lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -78,9 +78,9 @@ if (!shouldEmitPersonality) return; - MCSymbol *GCCHandlerSym = - Asm->GetExternalSymbolSymbol("_GCC_specific_handler"); - Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true); + const MCSymbol *PersHandlerSym = TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, + Asm->TM, MMI); + Asm->OutStreamer.EmitWin64EHHandler(PersHandlerSym, true, true); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); @@ -99,15 +99,8 @@ MMI->TidyLandingPads(); if (shouldEmitPersonality) { - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; - const MCSymbol *Sym = - TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); - Asm->OutStreamer.PushSection(); Asm->OutStreamer.EmitWin64EHHandlerData(); - Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext), - 4); EmitExceptionTable(); Asm->OutStreamer.PopSection(); } Index: lib/CodeGen/MachineFunction.cpp =================================================================== --- lib/CodeGen/MachineFunction.cpp +++ lib/CodeGen/MachineFunction.cpp @@ -457,7 +457,7 @@ /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table. /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a /// normal 'L' label is returned. -MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, +MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { const DataLayout *DL = getTarget().getDataLayout(); assert(JumpTableInfo && "No jump tables"); @@ -533,7 +533,7 @@ Alignment = clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); + Alignment, getFrameLowering()->getStackAlignment()); CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -551,7 +551,7 @@ Alignment = clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); + Alignment, getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; @@ -574,13 +574,28 @@ Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || !RealignOption, - Align, getFrameLowering()->getStackAlignment()); + Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, /*Alloca*/ nullptr)); return -++NumFixedObjects; } +/// CreateFixedSpillStackObject - Create a spill slot at a fixed location +/// on the stack. Returns an index with a negative value. +int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset) { + unsigned StackAlign = getFrameLowering()->getStackAlignment(); + unsigned Align = MinAlign(SPOffset, StackAlign); + Align = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, + /*Immutable*/ true, + /*isSS*/ true, + /*Alloca*/ nullptr)); + return -++NumFixedObjects; +} BitVector MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { @@ -849,10 +864,10 @@ if (isa(A->getType()) || isa(A->getType()) || isa(B->getType()) || isa(B->getType())) return false; - + // For now, only support constants with the same size. uint64_t StoreSize = TD->getTypeStoreSize(A->getType()); - if (StoreSize != TD->getTypeStoreSize(B->getType()) || + if (StoreSize != TD->getTypeStoreSize(B->getType()) || StoreSize > 128) return false; @@ -882,7 +897,7 @@ /// an existing one. User must specify the log2 of the minimum required /// alignment for the object. /// -unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, +unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, unsigned Alignment) { assert(Alignment && "Alignment must be specified!"); if (Alignment > PoolAlignment) PoolAlignment = Alignment; Index: lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- lib/CodeGen/PrologEpilogInserter.cpp +++ lib/CodeGen/PrologEpilogInserter.cpp @@ -268,51 +268,55 @@ } } - if (CSI.empty()) - return; // Early exit if no callee saved registers are modified! - - unsigned NumFixedSpillSlots; - const TargetFrameLowering::SpillSlot *FixedSpillSlots = - TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); - - // Now that we know which registers need to be saved and restored, allocate - // stack slots for them. - for (std::vector::iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) { - unsigned Reg = I->getReg(); - const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); - - int FrameIdx; - if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { - I->setFrameIdx(FrameIdx); - continue; - } + if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { + // If target doesn't implement this, use generic code. + + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! + + unsigned NumFixedSpillSlots; + const TargetFrameLowering::SpillSlot *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (std::vector::iterator + I = CSI.begin(), E = CSI.end(); I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } - // Check to see if this physreg must be spilled to a particular stack slot - // on this target. - const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; - while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && - FixedSlot->Reg != Reg) - ++FixedSlot; - - if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { - // Nope, just spill it anywhere convenient. - unsigned Align = RC->getAlignment(); - unsigned StackAlign = TFI->getStackAlignment(); - - // We may not be able to satisfy the desired alignment specification of - // the TargetRegisterClass if the stack alignment is smaller. Use the - // min. - Align = std::min(Align, StackAlign); - FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && + FixedSlot->Reg != Reg) + ++FixedSlot; + + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. + Align = std::min(Align, StackAlign); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; - } else { - // Spill it to the stack where we must. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); - } + } else { + // Spill it to the stack where we must. + FrameIdx = MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + } - I->setFrameIdx(FrameIdx); + I->setFrameIdx(FrameIdx); + } } MFI->setCalleeSavedInfo(CSI); Index: lib/MC/MCAsmStreamer.cpp =================================================================== --- lib/MC/MCAsmStreamer.cpp +++ lib/MC/MCAsmStreamer.cpp @@ -1134,17 +1134,14 @@ void MCAsmStreamer::EmitWin64EHPushReg(unsigned Register) { MCStreamer::EmitWin64EHPushReg(Register); - OS << "\t.seh_pushreg "; - EmitRegisterName(Register); + OS << "\t.seh_pushreg " << Register; EmitEOL(); } void MCAsmStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) { MCStreamer::EmitWin64EHSetFrame(Register, Offset); - OS << "\t.seh_setframe "; - EmitRegisterName(Register); - OS << ", " << Offset; + OS << "\t.seh_setframe " << Register << ", " << Offset; EmitEOL(); } @@ -1158,18 +1155,14 @@ void MCAsmStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) { MCStreamer::EmitWin64EHSaveReg(Register, Offset); - OS << "\t.seh_savereg "; - EmitRegisterName(Register); - OS << ", " << Offset; + OS << "\t.seh_savereg " << Register << ", " << Offset; EmitEOL(); } void MCAsmStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) { MCStreamer::EmitWin64EHSaveXMM(Register, Offset); - OS << "\t.seh_savexmm "; - EmitRegisterName(Register); - OS << ", " << Offset; + OS << "\t.seh_savexmm " << Register << ", " << Offset; EmitEOL(); } Index: lib/MC/MCObjectFileInfo.cpp =================================================================== --- lib/MC/MCObjectFileInfo.cpp +++ lib/MC/MCObjectFileInfo.cpp @@ -632,11 +632,17 @@ // though it contains relocatable pointers. In PIC mode, this is probably a // big runtime hit for C++ apps. Either the contents of the LSDA need to be // adjusted or this should be a data section. - LSDASection = - Ctx->getCOFFSection(".gcc_except_table", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); + assert(T.isOSWindows() && "COFF is supported only on Windows"); + if (T.getArch() == Triple::x86_64) { + // On Windows 64 with SEH, the LSDA is emitted into the .xdata section + LSDASection = 0; + } else { + LSDASection = + Ctx->getCOFFSection(".gcc_except_table", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + } // Debug info. COFFDebugSymbolsSection = Index: lib/MC/MCStreamer.cpp =================================================================== --- lib/MC/MCStreamer.cpp +++ lib/MC/MCStreamer.cpp @@ -526,6 +526,8 @@ report_fatal_error("Frame register and offset already specified!"); if (Offset & 0x0F) report_fatal_error("Misaligned frame pointer offset!"); + if (Offset > 240) + report_fatal_error("Frame offset must be less than or equal to 240!"); MCSymbol *Label = getContext().CreateTempSymbol(); MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, Label, Register, Offset); EmitLabel(Label); Index: lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp =================================================================== --- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -142,8 +142,11 @@ void X86MCAsmInfoMicrosoft::anchor() { } X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { - if (Triple.getArch() == Triple::x86_64) + if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; + PointerSize = 8; + ExceptionsType = ExceptionHandling::Win64; + } AssemblerDialect = AsmWriterFlavor; @@ -157,17 +160,18 @@ void X86MCAsmInfoGNUCOFF::anchor() { } X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { + assert(Triple.isOSWindows() && "COFF is supported only on Windows"); if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; PointerSize = 8; + ExceptionsType = ExceptionHandling::Win64; + } else { + ExceptionsType = ExceptionHandling::DwarfCFI; } AssemblerDialect = AsmWriterFlavor; TextAlignFillValue = 0x90; - // Exceptions handling - ExceptionsType = ExceptionHandling::DwarfCFI; - UseIntegratedAssembler = true; } Index: lib/Target/X86/X86FrameLowering.h =================================================================== --- lib/Target/X86/X86FrameLowering.h +++ lib/Target/X86/X86FrameLowering.h @@ -27,8 +27,8 @@ : TargetFrameLowering(StackGrowsDown, StackAl, LAO) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc DL, - unsigned FramePtr) const; + MachineBasicBlock::iterator MBBI, + DebugLoc DL) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. @@ -42,6 +42,10 @@ void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = nullptr) const override; + bool assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const override; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -306,8 +307,7 @@ } void X86FrameLowering::emitCalleeSavedFrameMoves( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, - unsigned FramePtr) const { + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); @@ -318,53 +318,11 @@ const std::vector &CSI = MFI->getCalleeSavedInfo(); if (CSI.empty()) return; - const X86RegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - bool HasFP = hasFP(MF); - - // Calculate amount of bytes used for return address storing. - int stackGrowth = -RegInfo->getSlotSize(); - - // FIXME: This is dirty hack. The code itself is pretty mess right now. - // It should be rewritten from scratch and generalized sometimes. - - // Determine maximum offset (minimum due to stack growth). - int64_t MaxOffset = 0; - for (std::vector::const_iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) - MaxOffset = std::min(MaxOffset, - MFI->getObjectOffset(I->getFrameIdx())); - // Calculate offsets. - int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; for (std::vector::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); - Offset = MaxOffset - Offset + saveAreaOffset; - - // Don't output a new machine move if we're re-saving the frame - // pointer. This happens when the PrologEpilogInserter has inserted an extra - // "PUSH" of the frame pointer -- the "emitPrologue" method automatically - // generates one when frame pointers are used. If we generate a "machine - // move" for this extra "PUSH", the linker will lose track of the fact that - // the frame pointer should have the value of the first "PUSH" when it's - // trying to unwind. - // - // FIXME: This looks inelegant. It's possibly correct, but it's covering up - // another bug. I.e., one where we generate a prolog like this: - // - // pushl %ebp - // movl %esp, %ebp - // pushl %ebp - // pushl %esi - // ... - // - // The immediate re-push of EBP is unnecessary. At the least, it's an - // optimization bug. EBP can be used as a scratch register in certain - // cases, but probably not when we have a frame pointer. - if (HasFP && FramePtr == Reg) - continue; unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); unsigned CFIIndex = @@ -396,6 +354,84 @@ /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to /// generate the exception handling frames. + +/* + Here's a gist of what gets emitted: + + ; Establish frame pointer, if needed + [if needs FP] + push %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + .seh_pushreg %rpb + mov %rsp, %rbp + .cfi_def_cfa_register %rbp + + ; Spill general-purpose registers + [for all callee-saved GRPs] + pushq % + [if not needs FP] + .cfi_def_cfa_offset (offset from RETADDR) + .seh_pushreg % + + ; If the required stack alignment > default stack alignment + ; rsp needs to be re-aligned. This creates a "re-alignment gap" + ; of unknown size in the stack frame. + [if stack needs re-alignment] + and $MASK, %rsp + + ; Allocate space for locals + [if target is Windows and allocated space > 4096 bytes] + ; Windows needs special care for allocations larger + ; than one page. + mov $NNN, %rax + call ___chkstk_ms/___chkstk + sub %rax, %rsp + [else] + sub $NNN, %rsp + + [if needs FP] + .seh_stackalloc (size of XMM spill slots) + .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots + [else] + .seh_stackalloc NNN + + ; Spill XMMs + ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, + ; they may get spilled on any platform, if the current function + ; calls @llvm.eh.unwind.init + [if needs FP] + [for all callee-saved XMM registers] + movaps %, -MMM(%rbp) + [for all callee-saved XMM registers] + .seh_savexmm %, (-MMM + SEHFrameOffset) + ; i.e. the offset relative to (%rbp - SEHFrameOffset) + [else] + [for all callee-saved XMM registers] + movaps %, KKK(%rsp) + [for all callee-saved XMM registers] + .seh_savexmm %, KKK + + .seh_endprologue + + [if needs base pointer] + mov %rsp, %rbx + + ; Emit CFI info + [if needs FP] + [for all callee-saved registers] + .cfi_offset %, (offset from %rbp) + [else] + .cfi_def_cfa_offset (offset from RETADDR) + [for all callee-saved registers] + .cfi_offset %, (offset from %rsp) + + Notes: + - .seh directives are emitted only for Windows 64 ABI + - .cfi directives are emitted for all other ABIs + - for 32-bit code, substitute %e?? registers for %r?? +*/ + void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -406,8 +442,6 @@ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); X86MachineFunctionInfo *X86FI = MF.getInfo(); - bool needsFrameMoves = MMI.hasDebugInfo() || - Fn->needsUnwindTableEntry(); uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. bool HasFP = hasFP(MF); @@ -415,6 +449,8 @@ bool Is64Bit = STI.is64Bit(); bool IsLP64 = STI.isTarget64BitLP64(); bool IsWin64 = STI.isTargetWin64(); + bool NeedsWin64SEH = IsWin64 && Fn->needsUnwindTableEntry(); + bool NeedsDwarfCFI = !IsWin64 && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); @@ -512,7 +548,7 @@ .addReg(FramePtr, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (needsFrameMoves) { + if (NeedsDwarfCFI) { // Mark the place where EBP/RBP was saved. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -530,13 +566,19 @@ .addCFIIndex(CFIIndex); } + if (NeedsWin64SEH) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) + .addImm(FramePtr) + .setMIFlag(MachineInstr::FrameSetup); + } + // Update EBP with the new base value. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); - if (needsFrameMoves) { + if (NeedsDwarfCFI) { // Mark effective beginning of when frame pointer becomes valid. // Define the current CFA to use the EBP/RBP register. unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); @@ -546,8 +588,8 @@ .addCFIIndex(CFIIndex); } - // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); + // Mark the FramePtr as live-in in every block. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) I->addLiveIn(FramePtr); } else { @@ -562,10 +604,10 @@ (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; - MBBI->setFlag(MachineInstr::FrameSetup); + unsigned Reg = MBBI->getOperand(0).getReg(); ++MBBI; - if (!HasFP && needsFrameMoves) { + if (!HasFP && NeedsDwarfCFI) { // Mark callee-saved push instruction. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -575,16 +617,16 @@ .addCFIIndex(CFIIndex); StackOffset += stackGrowth; } + + if (NeedsWin64SEH) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) + .addImm(Reg) + .setMIFlag(MachineInstr::FrameSetup); + } } // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). - - // NOTE: We push the registers before realigning the stack, so - // vector callee-saved (xmm) registers may be saved w/o proper - // alignment in this way. However, currently these regs are saved in - // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so - // this shouldn't be a problem. if (RegInfo->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); MachineInstr *MI = @@ -683,23 +725,87 @@ MI->setFlag(MachineInstr::FrameSetup); MBB.insert(MBBI, MI); } - } else if (NumBytes) + } else if (NumBytes) { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, UseLEA, TII, *RegInfo); + } + + int SEHFrameOffset = 0; + if (NeedsWin64SEH) { + if (HasFP) { + // We need to set frame base offset low enough such that all saved + // register offsets would be positive relative to it, but we can't + // just use NumBytes, because .seh_setframe offset must be <=240. + // So we pretend to have only allocated enough space to spill the + // non-volatile registers. + // We don't care about the rest of stack allocation, because unwinder + // will restore SP to (BP - SEHFrameOffset) + for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { + int offset = MFI->getObjectOffset(Info.getFrameIdx()); + SEHFrameOffset = std::max(SEHFrameOffset, abs(offset)); + } + SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant + + // This only needs to account for XMM spill slots, GPR slots + // are covered by .seh_pushreg's emitted above. + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(SEHFrameOffset - X86FI->getCalleeSavedFrameSize()) + .setMIFlag(MachineInstr::FrameSetup); + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) + .addImm(FramePtr) + .addImm(SEHFrameOffset) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // SP will be the base register for restoring XMMs + if (NumBytes) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + } + } + } + + // Skip the rest of register spilling code + while (MBBI != MBB.end() && + MBBI->getFlag(MachineInstr::FrameSetup)) + ++MBBI; + + // Emit SEH info for non-GPRs + if (NeedsWin64SEH) { + for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { + unsigned Reg = Info.getReg(); + if (X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg)) + continue; + assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class"); + + int Offset = getFrameIndexOffset(MF, Info.getFrameIdx()); + Offset += SEHFrameOffset; + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) + .addImm(Reg) + .addImm(Offset) + .setMIFlag(MachineInstr::FrameSetup); + } + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) + .setMIFlag(MachineInstr::FrameSetup); + } // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) { - // Update the frame pointer with the current stack pointer. + // Update the base pointer with the current stack pointer. unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); } - if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { + if (( (!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { // Mark end of stack pointer adjustment. if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. @@ -714,7 +820,7 @@ // Emit DWARF info specifying the offsets of the callee-saved registers. if (PushedRegs) - emitCalleeSavedFrameMoves(MBB, MBBI, DL, HasFP ? FramePtr : StackPtr); + emitCalleeSavedFrameMoves(MBB, MBBI, DL); } } @@ -974,48 +1080,103 @@ return getFrameIndexOffset(MF, FI); } +bool X86FrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86RegisterInfo *RegInfo = + static_cast(MF.getTarget().getRegisterInfo()); + unsigned SlotSize = RegInfo->getSlotSize(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + + unsigned CalleeSavedFrameSize = 0; + int SpillSlotOffset = getOffsetOfLocalArea() + + X86FI->getTCReturnAddrDelta(); + + if (hasFP(MF)) { + // emitPrologue always spills frame register the first thing. + SpillSlotOffset -= SlotSize; + MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + + // Since emitPrologue and emitEpilogue will handle spilling and restoring of + // the frame register, we can delete it from CSI list and not have to worry + // about avoiding it later. + unsigned FPReg = RegInfo->getFrameRegister(MF); + for (unsigned i = 0; i < CSI.size(); ++i) { + if (CSI[i].getReg() == FPReg) { + CSI.erase(CSI.begin() + i); + break; + } + } + } + + // Assign slots for GRPs. It increases frame size. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + + if (!X86::GR64RegClass.contains(Reg) && + !X86::GR32RegClass.contains(Reg)) + continue; + + SpillSlotOffset -= SlotSize; + CalleeSavedFrameSize += SlotSize; + + int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + CSI[i-1].setFrameIdx(SlotIndex); + } + + X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); + + // Assign slots for XMMs. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + if (X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg)) + continue; + + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + // ensure alignment + SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment(); + // spill into slot + SpillSlotOffset -= RC->getSize(); + int SlotIndex = MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); + CSI[i-1].setFrameIdx(SlotIndex); + MFI->ensureMaxAlignment(RC->getAlignment()); + } + + return true; +} + + bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, + MachineBasicBlock::iterator MI, + const std::vector &CSI, const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - DebugLoc DL = MBB.findDebugLoc(MI); MachineFunction &MF = *MBB.getParent(); const X86RegisterInfo *RegInfo = static_cast(MF.getTarget().getRegisterInfo()); - unsigned SlotSize = RegInfo->getSlotSize(); - unsigned FPReg = TRI->getFrameRegister(MF); - unsigned CalleeFrameSize = 0; - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - X86MachineFunctionInfo *X86FI = MF.getInfo(); const X86Subtarget &STI = MF.getTarget().getSubtarget(); // Push GPRs. It increases frame size. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); + if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - if (Reg == FPReg) - // X86RegisterInfo::emitPrologue will handle spilling of frame register. - continue; - CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); } - X86FI->setCalleeSavedFrameSize(CalleeFrameSize); - // Make XMM regs spilled. X86 does not have ability of push/pop XMM. // It can be done by spilling XMMs to stack frame. - // Note that only Win64 ABI might spill XMMs. for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (X86::GR64RegClass.contains(Reg) || @@ -1024,8 +1185,12 @@ // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RC, TRI); + --MI; + MI->setFlag(MachineInstr::FrameSetup); + ++MI; } return true; @@ -1050,22 +1215,20 @@ if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - RC, TRI); + + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + RC, TRI); } // POP GPRs. - unsigned FPReg = TRI->getFrameRegister(MF); unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; - if (Reg == FPReg) - // X86RegisterInfo::emitEpilogue will handle restoring of frame register. - continue; + BuildMI(MBB, MI, DL, TII.get(Opc), Reg); } return true; @@ -1096,22 +1259,6 @@ TailCallReturnAddrDelta - SlotSize, true); } - if (hasFP(MF)) { - assert((TailCallReturnAddrDelta <= 0) && - "The Delta should always be zero or negative"); - const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); - - // Create a frame entry for the EBP register that must be saved. - int FrameIdx = MFI->CreateFixedObject(SlotSize, - -(int)SlotSize + - TFI.getOffsetOfLocalArea() + - TailCallReturnAddrDelta, - true); - assert(FrameIdx == MFI->getObjectIndexBegin() && - "Slot for EBP register must be last in order to be found!"); - (void)FrameIdx; - } - // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -599,7 +599,8 @@ // FIXME - use subtarget debug flags if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() && - !Subtarget->isTargetCygMing()) { + !Subtarget->isTargetCygMing() && + !Subtarget->isTargetWin64()) { setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } Index: lib/Target/X86/X86InstrCompiler.td =================================================================== --- lib/Target/X86/X86InstrCompiler.td +++ lib/Target/X86/X86InstrCompiler.td @@ -110,7 +110,7 @@ // When using segmented stacks these are lowered into instructions which first // check if the current stacklet has enough free memory. If it does, memory is -// allocated by bumping the stack pointer. Otherwise memory is allocated from +// allocated by bumping the stack pointer. Otherwise memory is allocated from // the heap. let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in @@ -197,6 +197,26 @@ } //===----------------------------------------------------------------------===// +// Pseudo instructions used by unwind info. +// +let isPseudo = 1 in { + def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg), + "#SEH_PushReg $reg", []>; + def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), + "#SEH_SaveReg $reg, $dst", []>; + def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), + "#SEH_SaveXMM $reg, $dst", []>; + def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size), + "#SEH_StackAlloc $size", []>; + def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset), + "#SEH_SetFrame $reg, $offset", []>; + def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode), + "#SEH_PushFrame $mode", []>; + def SEH_EndPrologue : I<0, Pseudo, (outs), (ins), + "#SEH_EndPrologue", []>; +} + +//===----------------------------------------------------------------------===// // Pseudo instructions used by segmented stacks. // @@ -371,7 +391,7 @@ def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32, Requires<[In64BitMode]>; - + let Uses = [RAX,RCX,RDI] in def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", [(X86rep_stos i64)], IIC_REP_STOS>, REP, Index: lib/Target/X86/X86MCInstLower.cpp =================================================================== --- lib/Target/X86/X86MCInstLower.cpp +++ lib/Target/X86/X86MCInstLower.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86AsmPrinter.h" +#include "X86RegisterInfo.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" #include "llvm/ADT/SmallString.h" @@ -779,6 +780,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(*MF, *this); + const X86RegisterInfo *RI = + static_cast(TM.getRegisterInfo()); + switch (MI->getOpcode()) { case TargetOpcode::DBG_VALUE: llvm_unreachable("Should be handled target independently"); @@ -883,6 +887,43 @@ .addReg(X86::R10) .addReg(X86::RAX)); return; + + case X86::SEH_PushReg: + OutStreamer.EmitWin64EHPushReg( + RI->getSEHRegNum(MI->getOperand(0).getImm())); + return; + + case X86::SEH_SaveReg: + OutStreamer.EmitWin64EHSaveReg( + RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + return; + + case X86::SEH_SaveXMM: + OutStreamer.EmitWin64EHSaveXMM( + RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + return; + + case X86::SEH_StackAlloc: + OutStreamer.EmitWin64EHAllocStack( + MI->getOperand(0).getImm()); + return; + + case X86::SEH_SetFrame: + OutStreamer.EmitWin64EHSetFrame( + RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + return; + + case X86::SEH_PushFrame: + OutStreamer.EmitWin64EHPushFrame( + MI->getOperand(0).getImm()); + return; + + case X86::SEH_EndPrologue: + OutStreamer.EmitWin64EHEndProlog(); + return; } MCInst TmpInst; Index: test/CodeGen/X86/2007-05-05-Personality.ll =================================================================== --- test/CodeGen/X86/2007-05-05-Personality.ll +++ test/CodeGen/X86/2007-05-05-Personality.ll @@ -1,12 +1,14 @@ ; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o - | FileCheck %s --check-prefix=LIN -; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s --check-prefix=LIN ; RUN: llc < %s -mtriple=i386-pc-mingw32 -o - | FileCheck %s --check-prefix=WIN ; RUN: llc < %s -mtriple=i686-pc-windows-gnu -o - | FileCheck %s --check-prefix=WIN +; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s --check-prefix=WIN64 ; LIN: .cfi_personality 0, __gnat_eh_personality ; LIN: .cfi_lsda 0, .Lexception0 ; WIN: .cfi_personality 0, ___gnat_eh_personality ; WIN: .cfi_lsda 0, Lexception0 +; WIN64: .seh_handler __gnat_eh_personality +; WIN64: .seh_handlerdata @error = external global i8 @@ -15,7 +17,7 @@ invoke void @raise() to label %eh_then unwind label %unwind -unwind: ; preds = %entry +unwind: ; preds = %entry %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*) catch i8* @error %eh_select = extractvalue { i8*, i32 } %eh_ptr, 1 Index: test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll =================================================================== --- test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll +++ test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll @@ -1,7 +1,7 @@ ; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s ; CHECK: subq $40, %rsp -; CHECK: movaps %xmm8, (%rsp) -; CHECK: movaps %xmm7, 16(%rsp) +; CHECK: movaps %xmm8, 16(%rsp) +; CHECK: movaps %xmm7, (%rsp) define i32 @a() nounwind { entry: Index: test/CodeGen/X86/avx-intel-ocl.ll =================================================================== --- test/CodeGen/X86/avx-intel-ocl.ll +++ test/CodeGen/X86/avx-intel-ocl.ll @@ -7,21 +7,21 @@ declare <16 x float> @func_float16(<16 x float>, <16 x float>) declare i32 @func_int(i32, i32) -; WIN64: testf16_inp +; WIN64-LABEL: testf16_inp ; WIN64: vaddps {{.*}}, {{%ymm[0-1]}} ; WIN64: vaddps {{.*}}, {{%ymm[0-1]}} ; WIN64: leaq {{.*}}(%rsp), %rcx ; WIN64: call ; WIN64: ret -; X32: testf16_inp +; X32-LABEL: testf16_inp ; X32: movl %eax, (%esp) ; X32: vaddps {{.*}}, {{%ymm[0-1]}} ; X32: vaddps {{.*}}, {{%ymm[0-1]}} ; X32: call ; X32: ret -; X64: testf16_inp +; X64-LABEL: testf16_inp ; X64: vaddps {{.*}}, {{%ymm[0-1]}} ; X64: vaddps {{.*}}, {{%ymm[0-1]}} ; X64: leaq {{.*}}(%rsp), %rdi @@ -41,14 +41,14 @@ ;test calling conventions - preserved registers ; preserved ymm6-ymm15 -; WIN64: testf16_regs +; WIN64-LABEL: testf16_regs ; WIN64: call ; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; WIN64: ret ; preserved ymm8-ymm15 -; X64: testf16_regs +; X64-LABEL: testf16_regs ; X64: call ; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} @@ -65,28 +65,30 @@ } ; test calling conventions - prolog and epilog -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill -; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}} # 32-byte Spill +; WIN64-LABEL: test_prolog_epilog +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill +; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}} # 32-byte Spill ; WIN64: call -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload -; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload - +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload +; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}} # 32-byte Reload + +; X64-LABEL: test_prolog_epilog ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp) ## 32-byte Folded Spill @@ -111,12 +113,14 @@ ; test functions with integer parameters ; pass parameters on stack for 32-bit platform +; X32-LABEL: test_int ; X32: movl {{.*}}, 4(%esp) ; X32: movl {{.*}}, (%esp) ; X32: call ; X32: addl {{.*}}, %eax ; pass parameters in registers for 64-bit platform +; X64-LABEL: test_int ; X64: leal {{.*}}, %edi ; X64: movl {{.*}}, %esi ; X64: call @@ -128,21 +132,21 @@ ret i32 %c } -; WIN64: test_float4 +; WIN64-LABEL: test_float4 ; WIN64-NOT: vzeroupper ; WIN64: call ; WIN64-NOT: vzeroupper ; WIN64: call ; WIN64: ret -; X64: test_float4 +; X64-LABEL: test_float4 ; X64-NOT: vzeroupper ; X64: call ; X64-NOT: vzeroupper ; X64: call ; X64: ret -; X32: test_float4 +; X32-LABEL: test_float4 ; X32: vzeroupper ; X32: call ; X32: vzeroupper Index: test/CodeGen/X86/gcc_except_table.ll =================================================================== --- test/CodeGen/X86/gcc_except_table.ll +++ test/CodeGen/X86/gcc_except_table.ll @@ -13,14 +13,14 @@ ; APPLE: GCC_except_table0: ; APPLE: Lexception0: -; MINGW64: .cfi_startproc -; MINGW64: .cfi_personality 0, __gxx_personality_v0 -; MINGW64: .cfi_lsda 0, .Lexception0 -; MINGW64: .cfi_def_cfa_offset 16 +; MINGW64: .seh_proc +; MINGW64: .seh_handler __gxx_personality_v0 +; MINGW64: .seh_setframe 5, 0 ; MINGW64: callq _Unwind_Resume -; MINGW64: .cfi_endproc +; MINGW64: .seh_handlerdata ; MINGW64: GCC_except_table0: ; MINGW64: Lexception0: +; MINGW64: .seh_endproc ; MINGW32: .cfi_startproc ; MINGW32: .cfi_personality 0, ___gxx_personality_v0 Index: test/CodeGen/X86/win64_eh.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/win64_eh.ll @@ -0,0 +1,170 @@ +; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64 +; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=WIN64 + +; Check function without prolog +define void @foo0() uwtable { +entry: + ret void +} +; WIN64-LABEL: foo0: +; WIN64: .seh_proc foo0 +; WIN64: .seh_endprologue +; WIN64: ret +; WIN64: .seh_endproc + +; Checks a small stack allocation +define void @foo1() uwtable { +entry: + %baz = alloca [2000 x i16], align 2 + ret void +} +; WIN64-LABEL: foo1: +; WIN64: .seh_proc foo1 +; WIN64: subq $4000, %rsp +; WIN64: .seh_stackalloc 4000 +; WIN64: .seh_endprologue +; WIN64: addq $4000, %rsp +; WIN64: ret +; WIN64: .seh_endproc + +; Checks a stack allocation requiring call to __chkstk/___chkstk_ms +define void @foo2() uwtable { +entry: + %baz = alloca [4000 x i16], align 2 + ret void +} +; WIN64-LABEL: foo2: +; WIN64: .seh_proc foo2 +; WIN64: movabsq $8000, %rax +; WIN64: callq {{__chkstk|___chkstk_ms}} +; WIN64: subq %rax, %rsp +; WIN64: .seh_stackalloc 8000 +; WIN64: .seh_endprologue +; WIN64: addq $8000, %rsp +; WIN64: ret +; WIN64: .seh_endproc + + +; Checks stack push +define i32 @foo3(i32 %f_arg, i32 %e_arg, i32 %d_arg, i32 %c_arg, i32 %b_arg, i32 %a_arg) uwtable { +entry: + %a = alloca i32 + %b = alloca i32 + %c = alloca i32 + %d = alloca i32 + %e = alloca i32 + %f = alloca i32 + store i32 %a_arg, i32* %a + store i32 %b_arg, i32* %b + store i32 %c_arg, i32* %c + store i32 %d_arg, i32* %d + store i32 %e_arg, i32* %e + store i32 %f_arg, i32* %f + %tmp = load i32* %a + %tmp1 = mul i32 %tmp, 2 + %tmp2 = load i32* %b + %tmp3 = mul i32 %tmp2, 3 + %tmp4 = add i32 %tmp1, %tmp3 + %tmp5 = load i32* %c + %tmp6 = mul i32 %tmp5, 5 + %tmp7 = add i32 %tmp4, %tmp6 + %tmp8 = load i32* %d + %tmp9 = mul i32 %tmp8, 7 + %tmp10 = add i32 %tmp7, %tmp9 + %tmp11 = load i32* %e + %tmp12 = mul i32 %tmp11, 11 + %tmp13 = add i32 %tmp10, %tmp12 + %tmp14 = load i32* %f + %tmp15 = mul i32 %tmp14, 13 + %tmp16 = add i32 %tmp13, %tmp15 + ret i32 %tmp16 +} +; WIN64-LABEL: foo3: +; WIN64: .seh_proc foo3 +; WIN64: pushq %rsi +; WIN64: .seh_pushreg 6 +; WIN64: subq $24, %rsp +; WIN64: .seh_stackalloc 24 +; WIN64: .seh_endprologue +; WIN64: addq $24, %rsp +; WIN64: popq %rsi +; WIN64: ret +; WIN64: .seh_endproc + + +; Check emission of eh handler and handler data +declare i32 @_d_eh_personality(i32, i32, i64, i8*, i8*) +declare void @_d_eh_resume_unwind(i8*) + +declare i32 @bar() + +define i32 @foo4() #0 { +entry: + %step = alloca i32, align 4 + store i32 0, i32* %step + %tmp = load i32* %step + + %tmp1 = invoke i32 @bar() + to label %finally unwind label %landingpad + +finally: + store i32 1, i32* %step + br label %endtryfinally + +landingpad: + %landing_pad = landingpad { i8*, i32 } personality i32 (i32, i32, i64, i8*, i8*)* @_d_eh_personality + cleanup + %tmp3 = extractvalue { i8*, i32 } %landing_pad, 0 + store i32 2, i32* %step + call void @_d_eh_resume_unwind(i8* %tmp3) + unreachable + +endtryfinally: + %tmp10 = load i32* %step + ret i32 %tmp10 +} +; WIN64-LABEL: foo4: +; WIN64: .seh_proc foo4 +; WIN64: .seh_handler _d_eh_personality, @unwind, @except +; WIN64: subq $56, %rsp +; WIN64: .seh_stackalloc 56 +; WIN64: .seh_endprologue +; WIN64: addq $56, %rsp +; WIN64: ret +; WIN64: .seh_handlerdata +; WIN64: .seh_endproc + + +; Check stack re-alignment and xmm spilling +define void @foo5() uwtable { +entry: + %s = alloca i32, align 64 + call void asm sideeffect "", "~{rbx},~{rdi},~{xmm6},~{xmm7}"() + ret void +} +; WIN64-LABEL: foo5: +; WIN64: .seh_proc foo5 +; WIN64: pushq %rbp +; WIN64: .seh_pushreg 5 +; WIN64: movq %rsp, %rbp +; WIN64: pushq %rdi +; WIN64: .seh_pushreg 7 +; WIN64: pushq %rbx +; WIN64: .seh_pushreg 3 +; WIN64: andq $-64, %rsp +; WIN64: subq $128, %rsp +; WIN64: .seh_stackalloc 48 +; WIN64: .seh_setframe 5, 64 +; WIN64: movaps %xmm7, -32(%rbp) # 16-byte Spill +; WIN64: movaps %xmm6, -48(%rbp) # 16-byte Spill +; WIN64: .seh_savexmm 6, 16 +; WIN64: .seh_savexmm 7, 32 +; WIN64: .seh_endprologue +; WIN64: movaps -48(%rbp), %xmm6 # 16-byte Reload +; WIN64: movaps -32(%rbp), %xmm7 # 16-byte Reload +; WIN64: leaq -16(%rbp), %rsp +; WIN64: popq %rbx +; WIN64: popq %rdi +; WIN64: popq %rbp +; WIN64: retq +; WIN64: .seh_endproc Index: test/MC/AsmParser/directive_seh.s =================================================================== --- test/MC/AsmParser/directive_seh.s +++ test/MC/AsmParser/directive_seh.s @@ -3,10 +3,10 @@ # CHECK: .seh_proc func # CHECK: .seh_pushframe @code # CHECK: .seh_stackalloc 24 -# CHECK: .seh_savereg %rbp, 16 -# CHECK: .seh_savexmm %r8, 0 -# CHECK: .seh_pushreg %rbx -# CHECK: .seh_setframe %rbx, 0 +# CHECK: .seh_savereg 6, 16 +# CHECK: .seh_savexmm 8, 0 +# CHECK: .seh_pushreg 3 +# CHECK: .seh_setframe 3, 0 # CHECK: .seh_endprologue # CHECK: .seh_handler __C_specific_handler, @except # CHECK-NOT: .section{{.*}}.xdata