Index: include/llvm/Target/TargetFrameLowering.h =================================================================== --- include/llvm/Target/TargetFrameLowering.h +++ include/llvm/Target/TargetFrameLowering.h @@ -129,13 +129,17 @@ /// the assembly prologue to explicitly handle the stack. virtual void adjustForHiPEPrologue(MachineFunction &MF) const { } - /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee - /// saved registers and returns true if it isn't possible / profitable to do - /// so by issuing a series of store instructions via - /// storeRegToStackSlot(). Returns false otherwise. + /// spillCalleeSavedRegisters - Issues instruction(s) to spill all + /// callee saved registers and returns true if it isn't possible / + /// profitable to do so by issuing a series of store instructions + /// via storeRegToStackSlot(). Populates CSRSlots with [frame + /// index, offset] pairs for the frame indices that had to be + /// eagerly assigned stack slots by the instruction spilling code + /// (eg. by x86 push instructions). Returns false otherwise. virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const { return false; } Index: lib/CodeGen/PrologEpilogInserter.h =================================================================== --- lib/CodeGen/PrologEpilogInserter.h +++ lib/CodeGen/PrologEpilogInserter.h @@ -51,6 +51,12 @@ // stack frame indexes. unsigned MinCSFrameIndex, MaxCSFrameIndex; + // CSRSlots - Maps the frame indices that have already beeen + // assigned an offset by spillCalleeSavedRegisters to the offset + // they have been assigned. The offset is from the value of the + // stack pointer before executing the CSR spilling code. + DenseMap CSRSlots; + // Entry and return blocks of the current function. MachineBasicBlock* EntryBlock; SmallVector ReturnBlocks; Index: lib/CodeGen/PrologEpilogInserter.cpp =================================================================== --- lib/CodeGen/PrologEpilogInserter.cpp +++ lib/CodeGen/PrologEpilogInserter.cpp @@ -249,6 +249,7 @@ // These are used to keep track the callee-save area. Initialize them. MinCSFrameIndex = INT_MAX; MaxCSFrameIndex = 0; + CSRSlots.clear(); // Early exit for targets which have no callee saved registers. if (!CSRegs || CSRegs[0] == 0) @@ -339,7 +340,7 @@ // Spill using target interface. I = EntryBlock->begin(); - if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { + if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, CSRSlots, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. @@ -479,10 +480,36 @@ if (FixedOff > Offset) Offset = FixedOff; } - // First assign frame offsets to stack objects that are used to spill - // callee saved registers. + // Reify the stack offsets that were implicitly assigned to CSR + // frame indices. + const int CSRSlotsStart = Offset; + for (auto I = CSRSlots.begin(), E = CSRSlots.end(); I != E; ++I) { + int thisFrameIdx = I->first; + int thisOffset = I->second; + + int64_t nextOffset = 0; + + if (StackGrowsDown) { + thisOffset = CSRSlotsStart + (-thisOffset); + nextOffset = thisOffset; + MFI->setObjectOffset(thisFrameIdx, -thisOffset); + } else { + thisOffset = CSRSlotsStart + thisOffset; + nextOffset = thisOffset + MFI->getObjectSize(thisFrameIdx); + MFI->setObjectOffset(thisFrameIdx, thisOffset); + } + + Offset = std::max(Offset, nextOffset); + } + + // Offset now points to the end of the block of offsets implicitly + // assigned by the target specific spill code. Here we deal with + // the frame indices left over. + if (StackGrowsDown) { for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { + if (CSRSlots.count(i)) { continue; } // already assigned a slot above + // If the stack grows down, we need to add the size to find the lowest // address of the object. Offset += MFI->getObjectSize(i); @@ -496,6 +523,8 @@ } else { int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; for (int i = MaxCSFI; i >= MinCSFI ; --i) { + if (CSRSlots.count(i)) { continue; } // already assigned a slot above + unsigned Align = MFI->getObjectAlignment(i); // Adjust to alignment boundary Offset = (Offset+Align-1)/Align*Align; Index: lib/Target/AArch64/AArch64FrameLowering.h =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.h +++ lib/Target/AArch64/AArch64FrameLowering.h @@ -53,6 +53,7 @@ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -614,7 +614,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, - const TargetRegisterInfo *TRI) const { + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); unsigned Count = CSI.size(); Index: lib/Target/ARM/ARMFrameLowering.h =================================================================== --- lib/Target/ARM/ARMFrameLowering.h +++ lib/Target/ARM/ARMFrameLowering.h @@ -38,6 +38,7 @@ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, Index: lib/Target/ARM/ARMFrameLowering.cpp =================================================================== --- lib/Target/ARM/ARMFrameLowering.cpp +++ lib/Target/ARM/ARMFrameLowering.cpp @@ -1195,6 +1195,7 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; Index: lib/Target/ARM/Thumb1FrameLowering.h =================================================================== --- lib/Target/ARM/Thumb1FrameLowering.h +++ lib/Target/ARM/Thumb1FrameLowering.h @@ -36,6 +36,7 @@ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Index: lib/Target/ARM/Thumb1FrameLowering.cpp =================================================================== --- lib/Target/ARM/Thumb1FrameLowering.cpp +++ lib/Target/ARM/Thumb1FrameLowering.cpp @@ -408,6 +408,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; Index: lib/Target/Hexagon/HexagonFrameLowering.h =================================================================== --- lib/Target/Hexagon/HexagonFrameLowering.h +++ lib/Target/Hexagon/HexagonFrameLowering.h @@ -33,6 +33,7 @@ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const override; void Index: lib/Target/Hexagon/HexagonFrameLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonFrameLowering.cpp +++ lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -223,6 +223,7 @@ MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); Index: lib/Target/MSP430/MSP430FrameLowering.h =================================================================== --- lib/Target/MSP430/MSP430FrameLowering.h +++ lib/Target/MSP430/MSP430FrameLowering.h @@ -42,6 +42,7 @@ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Index: lib/Target/MSP430/MSP430FrameLowering.cpp =================================================================== --- lib/Target/MSP430/MSP430FrameLowering.cpp +++ lib/Target/MSP430/MSP430FrameLowering.cpp @@ -180,6 +180,7 @@ MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; Index: lib/Target/Mips/Mips16FrameLowering.h =================================================================== --- lib/Target/Mips/Mips16FrameLowering.h +++ lib/Target/Mips/Mips16FrameLowering.h @@ -34,6 +34,7 @@ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, Index: lib/Target/Mips/Mips16FrameLowering.cpp =================================================================== --- lib/Target/Mips/Mips16FrameLowering.cpp +++ lib/Target/Mips/Mips16FrameLowering.cpp @@ -100,6 +100,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); MachineBasicBlock *EntryBlock = MF->begin(); Index: lib/Target/Mips/MipsSEFrameLowering.h =================================================================== --- lib/Target/Mips/MipsSEFrameLowering.h +++ lib/Target/Mips/MipsSEFrameLowering.h @@ -35,6 +35,7 @@ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; Index: lib/Target/Mips/MipsSEFrameLowering.cpp =================================================================== --- lib/Target/Mips/MipsSEFrameLowering.cpp +++ lib/Target/Mips/MipsSEFrameLowering.cpp @@ -445,6 +445,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const { MachineFunction *MF = MBB.getParent(); MachineBasicBlock *EntryBlock = MF->begin(); Index: lib/Target/PowerPC/PPCFrameLowering.h =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.h +++ lib/Target/PowerPC/PPCFrameLowering.h @@ -54,6 +54,7 @@ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const override; void eliminateCallFramePseudoInstr(MachineFunction &MF, Index: lib/Target/PowerPC/PPCFrameLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCFrameLowering.cpp +++ lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1197,6 +1197,7 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const { // Currently, this function only handles SVR4 32- and 64-bit ABIs. Index: lib/Target/SystemZ/SystemZFrameLowering.h =================================================================== --- lib/Target/SystemZ/SystemZFrameLowering.h +++ lib/Target/SystemZ/SystemZFrameLowering.h @@ -38,6 +38,7 @@ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBII, Index: lib/Target/SystemZ/SystemZFrameLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZFrameLowering.cpp +++ lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -124,6 +124,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; Index: lib/Target/X86/X86FrameLowering.h =================================================================== --- lib/Target/X86/X86FrameLowering.h +++ lib/Target/X86/X86FrameLowering.h @@ -45,6 +45,7 @@ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -328,20 +328,10 @@ // FIXME: This is dirty hack. The code itself is pretty mess right now. // It should be rewritten from scratch and generalized sometimes. - // Determine maximum offset (minimum due to stack growth). - int64_t MaxOffset = 0; - for (std::vector::const_iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) - MaxOffset = std::min(MaxOffset, - MFI->getObjectOffset(I->getFrameIdx())); - - // Calculate offsets. - int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; for (std::vector::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); - Offset = MaxOffset - Offset + saveAreaOffset; // Don't output a new machine move if we're re-saving the frame // pointer. This happens when the PrologEpilogInserter has inserted an extra @@ -977,6 +967,7 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -994,6 +985,9 @@ X86MachineFunctionInfo *X86FI = MF.getInfo(); const X86Subtarget &STI = MF.getTarget().getSubtarget(); + // Offset into the CSR area. + int CSRFrameOffset = 0; + // Push GPRs. It increases frame size. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { @@ -1009,6 +1003,8 @@ CalleeFrameSize += SlotSize; BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); + CSRFrameOffset -= SlotSize; + CSRSlots[CSI[i-1].getFrameIdx()] = CSRFrameOffset; } X86FI->setCalleeSavedFrameSize(CalleeFrameSize); Index: lib/Target/XCore/XCoreFrameLowering.h =================================================================== --- lib/Target/XCore/XCoreFrameLowering.h +++ lib/Target/XCore/XCoreFrameLowering.h @@ -34,6 +34,7 @@ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const override; bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Index: lib/Target/XCore/XCoreFrameLowering.cpp =================================================================== --- lib/Target/XCore/XCoreFrameLowering.cpp +++ lib/Target/XCore/XCoreFrameLowering.cpp @@ -406,6 +406,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, + DenseMap &CSRSlots, const TargetRegisterInfo *TRI) const { if (CSI.empty()) return true; Index: test/CodeGen/X86/pr19905.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/pr19905.ll @@ -0,0 +1,79 @@ +; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s + +declare void @llvm.eh.unwind.init() + +define coldcc void @calls_unwind_init() { +; CHECK: pushq %rbp +; CHECK: .cfi_def_cfa_offset 16 +; CHECK: .cfi_offset %rbp, -16 +; CHECK: movq %rsp, %rbp +; CHECK: .cfi_def_cfa_register %rbp +; CHECK: pushq %r15 +; CHECK: pushq %r14 +; CHECK: pushq %r13 +; CHECK: pushq %r12 +; CHECK: pushq %r11 +; CHECK: pushq %r10 +; CHECK: pushq %r9 +; CHECK: pushq %r8 +; CHECK: pushq %rdi +; CHECK: pushq %rsi +; CHECK: pushq %rdx +; CHECK: pushq %rcx +; CHECK: pushq %rbx +; CHECK: subq $136, %rsp + +; These are supposed to be offsets from the CFA, which is the value of +; stack pointer just before the call. %rbx is the 14th register we +; explicitly push, including %rbp, so this offset is -8 (for the +; return address) + 14 * -8 = -120 + +; CHECK: .cfi_offset %rbx, -120 +; CHECK: .cfi_offset %rcx, -112 +; CHECK: .cfi_offset %rdx, -104 +; CHECK: .cfi_offset %rsi, -96 +; CHECK: .cfi_offset %rdi, -88 +; CHECK: .cfi_offset %r8, -80 +; CHECK: .cfi_offset %r9, -72 +; CHECK: .cfi_offset %r10, -64 +; CHECK: .cfi_offset %r11, -56 +; CHECK: .cfi_offset %r12, -48 +; CHECK: .cfi_offset %r13, -40 +; CHECK: .cfi_offset %r14, -32 +; CHECK: .cfi_offset %r15, -24 +; CHECK: .cfi_offset %xmm0, -144 +; CHECK: .cfi_offset %xmm1, -160 +; CHECK: .cfi_offset %xmm2, -176 +; CHECK: .cfi_offset %xmm3, -192 +; CHECK: .cfi_offset %xmm4, -208 +; CHECK: .cfi_offset %xmm5, -224 +; CHECK: .cfi_offset %xmm6, -240 +; CHECK: .cfi_offset %xmm7, -256 +; CHECK: .cfi_offset %xmm8, -272 +; CHECK: .cfi_offset %xmm9, -288 +; CHECK: .cfi_offset %xmm10, -304 +; CHECK: .cfi_offset %xmm11, -320 +; CHECK: .cfi_offset %xmm12, -336 +; CHECK: .cfi_offset %xmm13, -352 +; CHECK: .cfi_offset %xmm14, -368 +; CHECK: .cfi_offset %xmm15, -384 +; CHECK: movaps %xmm15, -368(%rbp) # 16-byte Spill +; CHECK: movaps %xmm14, -352(%rbp) # 16-byte Spill +; CHECK: movaps %xmm13, -336(%rbp) # 16-byte Spill +; CHECK: movaps %xmm12, -320(%rbp) # 16-byte Spill +; CHECK: movaps %xmm11, -304(%rbp) # 16-byte Spill +; CHECK: movaps %xmm10, -288(%rbp) # 16-byte Spill +; CHECK: movaps %xmm9, -272(%rbp) # 16-byte Spill +; CHECK: movaps %xmm8, -256(%rbp) # 16-byte Spill +; CHECK: movaps %xmm7, -240(%rbp) # 16-byte Spill +; CHECK: movaps %xmm6, -224(%rbp) # 16-byte Spill +; CHECK: movaps %xmm5, -208(%rbp) # 16-byte Spill +; CHECK: movaps %xmm4, -192(%rbp) # 16-byte Spill +; CHECK: movaps %xmm3, -176(%rbp) # 16-byte Spill +; CHECK: movaps %xmm2, -160(%rbp) # 16-byte Spill +; CHECK: movaps %xmm1, -144(%rbp) # 16-byte Spill +; CHECK: movaps %xmm0, -128(%rbp) # 16-byte Spill + + call void @llvm.eh.unwind.init() + ret void +}