Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.h +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.h @@ -24,8 +24,10 @@ // Override TargetFrameLowering. bool isFPCloseToIncomingSP() const override { return false; } - const SpillSlot *getCalleeSavedSpillSlots(unsigned &NumEntries) const - override; + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const override; void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override; bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, @@ -43,6 +45,8 @@ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override; bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; + int getFrameIndexReference(const MachineFunction &MF, int FI, + unsigned &FrameReg) const override; MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override; Index: llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -24,48 +24,114 @@ // The ABI-defined register save slots, relative to the CFA (i.e. // incoming stack pointer + SystemZMC::CallFrameSize). static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { - { SystemZ::R2D, -SystemZMC::CallFrameSize + 0x10 }, - { SystemZ::R3D, -SystemZMC::CallFrameSize + 0x18 }, - { SystemZ::R4D, -SystemZMC::CallFrameSize + 0x20 }, - { SystemZ::R5D, -SystemZMC::CallFrameSize + 0x28 }, - { SystemZ::R6D, -SystemZMC::CallFrameSize + 0x30 }, - { SystemZ::R7D, -SystemZMC::CallFrameSize + 0x38 }, - { SystemZ::R8D, -SystemZMC::CallFrameSize + 0x40 }, - { SystemZ::R9D, -SystemZMC::CallFrameSize + 0x48 }, - { SystemZ::R10D, -SystemZMC::CallFrameSize + 0x50 }, - { SystemZ::R11D, -SystemZMC::CallFrameSize + 0x58 }, - { SystemZ::R12D, -SystemZMC::CallFrameSize + 0x60 }, - { SystemZ::R13D, -SystemZMC::CallFrameSize + 0x68 }, - { SystemZ::R14D, -SystemZMC::CallFrameSize + 0x70 }, - { SystemZ::R15D, -SystemZMC::CallFrameSize + 0x78 }, - { SystemZ::F0D, -SystemZMC::CallFrameSize + 0x80 }, - { SystemZ::F2D, -SystemZMC::CallFrameSize + 0x88 }, - { SystemZ::F4D, -SystemZMC::CallFrameSize + 0x90 }, - { SystemZ::F6D, -SystemZMC::CallFrameSize + 0x98 } + { SystemZ::R2D, 0x10 }, + { SystemZ::R3D, 0x18 }, + { SystemZ::R4D, 0x20 }, + { SystemZ::R5D, 0x28 }, + { SystemZ::R6D, 0x30 }, + { SystemZ::R7D, 0x38 }, + { SystemZ::R8D, 0x40 }, + { SystemZ::R9D, 0x48 }, + { SystemZ::R10D, 0x50 }, + { SystemZ::R11D, 0x58 }, + { SystemZ::R12D, 0x60 }, + { SystemZ::R13D, 0x68 }, + { SystemZ::R14D, 0x70 }, + { SystemZ::R15D, 0x78 }, + { SystemZ::F0D, 0x80 }, + { SystemZ::F2D, 0x88 }, + { SystemZ::F4D, 0x90 }, + { SystemZ::F6D, 0x98 } }; } // end anonymous namespace SystemZFrameLowering::SystemZFrameLowering() : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(8), - -SystemZMC::CallFrameSize, Align(8), - false /* StackRealignable */) { + 0, Align(8), false /* StackRealignable */), + RegSpillOffsets(0) { // Due to the SystemZ ABI, the DWARF CFA (Canonical Frame Address) is not // equal to the incoming stack pointer, but to incoming stack pointer plus - // 160. The getOffsetOfLocalArea() returned value is interpreted as "the - // offset of the local area from the CFA". + // 160. Instead of using a Local Area Offset, the Register save area will + // be occupied by fixed frame objects, and all offsets are actually + // relative to CFA. // Create a mapping from register number to save slot offset. // These offsets are relative to the start of the register save area. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I) - RegSpillOffsets[SpillOffsetTable[I].Reg] = - SystemZMC::CallFrameSize + SpillOffsetTable[I].Offset; + RegSpillOffsets[SpillOffsetTable[I].Reg] = SpillOffsetTable[I].Offset; } -const TargetFrameLowering::SpillSlot * -SystemZFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const { - NumEntries = array_lengthof(SpillOffsetTable); - return SpillOffsetTable; +bool SystemZFrameLowering:: +assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector &CSI) const { + SystemZMachineFunctionInfo *ZFI = MF.getInfo(); + MachineFrameInfo &MFFrame = MF.getFrameInfo(); + if (CSI.empty()) + return true; // Early exit if no callee saved registers are modified! + + unsigned LowGPR = 0; + unsigned HighGPR = SystemZ::R15D; + int StartSPOffset = SystemZMC::CallFrameSize; + int CurrOffset; + if (!ZFI->usePackedStack(MF)) { + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); + int Offset = RegSpillOffsets[Reg]; + if (Offset) { + if (SystemZ::GR64BitRegClass.contains(Reg) && StartSPOffset > Offset) { + LowGPR = Reg; + StartSPOffset = Offset; + } + Offset -= SystemZMC::CallFrameSize; + int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); + CS.setFrameIdx(FrameIdx); + } else + CS.setFrameIdx(INT32_MAX); + } + CurrOffset = -SystemZMC::CallFrameSize; + } else { + // Packed stack: put all the GPRs at the top of the Register save area. + uint32_t LowGR64Num = UINT32_MAX; + for (auto &CS : CSI) { + unsigned Reg = CS.getReg(); + if (SystemZ::GR64BitRegClass.contains(Reg)) { + unsigned GR64Num = SystemZMC::getFirstReg(Reg); + int Offset = -8 * (15 - GR64Num + 1); + if (LowGR64Num > GR64Num) { + LowGR64Num = GR64Num; + StartSPOffset = SystemZMC::CallFrameSize + Offset; + } + int FrameIdx = MFFrame.CreateFixedSpillStackObject(8, Offset); + CS.setFrameIdx(FrameIdx); + } else + CS.setFrameIdx(INT32_MAX); + } + if (LowGR64Num < UINT32_MAX) + LowGPR = SystemZMC::GR64Regs[LowGR64Num]; + CurrOffset = LowGPR ? -(SystemZMC::CallFrameSize - StartSPOffset) : 0; + } + + // Save the range of call-saved registers, for use by the prologue/epilogue + // inserters. + ZFI->setLowSavedGPR(LowGPR); + ZFI->setHighSavedGPR(HighGPR); + ZFI->setLowGPROffset(StartSPOffset); + + // Create fixed stack objects for the remaining registers. + for (auto &CS : CSI) { + if (CS.getFrameIdx() != INT32_MAX) + continue; + unsigned Reg = CS.getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + unsigned Size = TRI->getSpillSize(*RC); + CurrOffset -= Size; + int FrameIdx = MFFrame.CreateFixedSpillStackObject(Size, CurrOffset); + CS.setFrameIdx(FrameIdx); + } + + return true; } void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF, @@ -115,6 +181,11 @@ break; } } + + if (!MFI->usePackedStack(MF)) + // Create the full incoming register save area regardless of how may CSRs + // will be saved. + MFI->getOrCreateFramePointerSaveIndex(MFFrame); } // Add GPR64 to the save instruction being built by MIB, which is in basic @@ -148,25 +219,9 @@ bool IsVarArg = MF.getFunction().isVarArg(); DebugLoc DL; - // Scan the call-saved GPRs and find the bounds of the register spill area. - unsigned LowGPR = 0; - unsigned HighGPR = SystemZ::R15D; - unsigned StartOffset = -1U; - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - unsigned Reg = CSI[I].getReg(); - if (SystemZ::GR64BitRegClass.contains(Reg)) { - unsigned Offset = RegSpillOffsets[Reg]; - assert(Offset && "Unexpected GPR save"); - if (StartOffset > Offset) { - LowGPR = Reg; - StartOffset = Offset; - } - } - } - - // Save the range of call-saved registers, for use by the epilogue inserter. - ZFI->setLowSavedGPR(LowGPR); - ZFI->setHighSavedGPR(HighGPR); + unsigned LowGPR = ZFI->getLowSavedGPR(); + unsigned HighGPR = ZFI->getHighSavedGPR(); + unsigned StartSPOffset = ZFI->getLowGPROffset(); // Include the GPR varargs, if any. R6D is call-saved, so would // be included by the loop above, but we also need to handle the @@ -176,8 +231,8 @@ if (FirstGPR < SystemZ::NumArgGPRs) { unsigned Reg = SystemZ::ArgGPRs[FirstGPR]; unsigned Offset = RegSpillOffsets[Reg]; - if (StartOffset > Offset) { - LowGPR = Reg; StartOffset = Offset; + if (StartSPOffset > Offset) { + LowGPR = Reg; StartSPOffset = Offset; } } } @@ -194,7 +249,7 @@ addSavedGPR(MBB, MIB, HighGPR, false); // Add the address. - MIB.addReg(SystemZ::R15D).addImm(StartOffset); + MIB.addReg(SystemZ::R15D).addImm(StartSPOffset); // Make sure all call-saved GPRs are included as operands and are // marked as live on entry. @@ -257,7 +312,7 @@ // this point might hold return values). unsigned LowGPR = ZFI->getLowSavedGPR(); unsigned HighGPR = ZFI->getHighSavedGPR(); - unsigned StartOffset = RegSpillOffsets[LowGPR]; + unsigned StartSPOffset = ZFI->getLowGPROffset(); if (LowGPR) { // If we saved any of %r2-%r5 as varargs, we should also be saving // and restoring %r6. If we're saving %r6 or above, we should be @@ -273,7 +328,7 @@ // Add the address. MIB.addReg(HasFP ? SystemZ::R11D : SystemZ::R15D); - MIB.addImm(StartOffset); + MIB.addImm(StartSPOffset); // Do a second scan adding regs as being defined by instruction for (unsigned I = 0, E = CSI.size(); I != E; ++I) { @@ -296,11 +351,10 @@ SystemZMC::CallFrameSize); // ... and the maximum offset we may need to reach into the // caller's frame to access the save area or stack arguments. - int64_t MaxArgOffset = SystemZMC::CallFrameSize; + int64_t MaxArgOffset = 0; for (int I = MFFrame.getObjectIndexBegin(); I != 0; ++I) if (MFFrame.getObjectOffset(I) >= 0) { - int64_t ArgOffset = SystemZMC::CallFrameSize + - MFFrame.getObjectOffset(I) + + int64_t ArgOffset = MFFrame.getObjectOffset(I) + MFFrame.getObjectSize(I); MaxArgOffset = std::max(MaxArgOffset, ArgOffset); } @@ -393,7 +447,8 @@ for (auto &Save : CSI) { unsigned Reg = Save.getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) { - int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg]; + int FI = Save.getFrameIdx(); + int64_t Offset = MFFrame.getObjectOffset(FI); unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION)) @@ -406,10 +461,19 @@ // We need to allocate the ABI-defined 160-byte base area whenever // we allocate stack space for our own use and whenever we call another // function. - if (StackSize || MFFrame.hasVarSizedObjects() || MFFrame.hasCalls()) { + bool HasStackObject = false; + for (unsigned i = 0, e = MFFrame.getObjectIndexEnd(); i != e; ++i) + if (!MFFrame.isDeadObjectIndex(i)) { + HasStackObject = true; + break; + } + if (HasStackObject || MFFrame.hasCalls()) StackSize += SystemZMC::CallFrameSize; - MFFrame.setStackSize(StackSize); - } + // Don't allocate the incoming reg save area. + StackSize = StackSize > SystemZMC::CallFrameSize + ? StackSize - SystemZMC::CallFrameSize + : 0; + MFFrame.setStackSize(StackSize); if (StackSize) { // Determine if we want to store a backchain. @@ -434,7 +498,8 @@ if (StoreBackchain) BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG)) - .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0).addReg(0); + .addReg(SystemZ::R1D, RegState::Kill).addReg(SystemZ::R15D).addImm(0) + .addReg(0); } if (HasFP) { @@ -555,6 +620,16 @@ return true; } +int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + unsigned &FrameReg) const { + // Our incoming SP is actually SystemZMC::CallFrameSize below the CFA, so + // add that difference here. + int64_t Offset = + TargetFrameLowering::getFrameIndexReference(MF, FI, FrameReg); + return Offset + SystemZMC::CallFrameSize; +} + MachineBasicBlock::iterator SystemZFrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1410,7 +1410,7 @@ // ...and a similar frame index for the caller-allocated save area // that will be used to store the incoming registers. - int64_t RegSaveOffset = TFL->getOffsetOfLocalArea(); + int64_t RegSaveOffset = -SystemZMC::CallFrameSize; unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); FuncInfo->setRegSaveFrameIndex(RegSaveIndex); @@ -3084,14 +3084,9 @@ unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); EVT PtrVT = getPointerTy(DAG.getDataLayout()); - // If the back chain frame index has not been allocated yet, do so. + // By definition, the frame address is the address of the back chain. SystemZMachineFunctionInfo *FI = MF.getInfo(); - int BackChainIdx = FI->getFramePointerSaveIndex(); - if (!BackChainIdx) { - // By definition, the frame address is the address of the back chain. - BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false); - FI->setFramePointerSaveIndex(BackChainIdx); - } + int BackChainIdx = FI->getOrCreateFramePointerSaveIndex(MFI); SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); // FIXME The frontend should detect this case. Index: llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h @@ -9,7 +9,10 @@ #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINEFUNCTIONINFO_H +#include "MCTargetDesc/SystemZMCTargetDesc.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Function.h" namespace llvm { @@ -24,12 +27,13 @@ int FramePointerSaveIndex; bool ManipulatesSP; unsigned NumLocalDynamics; + unsigned LowGPROffset; public: explicit SystemZMachineFunctionInfo(MachineFunction &MF) : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0), - ManipulatesSP(false), NumLocalDynamics(0) {} + ManipulatesSP(false), NumLocalDynamics(0), LowGPROffset(0) {} // Get and set the first call-saved GPR that should be saved and restored // by this function. This is 0 if no GPRs need to be saved or restored. @@ -59,9 +63,13 @@ unsigned getRegSaveFrameIndex() const { return RegSaveFrameIndex; } void setRegSaveFrameIndex(unsigned FI) { RegSaveFrameIndex = FI; } - // Get and set the frame index of where the old frame pointer is stored. - int getFramePointerSaveIndex() const { return FramePointerSaveIndex; } - void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; } + // Get or create the frame index of where the old frame pointer is stored. + int getOrCreateFramePointerSaveIndex(MachineFrameInfo &MFI) { + if (!FramePointerSaveIndex) + FramePointerSaveIndex = + MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false); + return FramePointerSaveIndex; + } // Get and set whether the function directly manipulates the stack pointer, // e.g. through STACKSAVE or STACKRESTORE. @@ -71,6 +79,19 @@ // Count number of local-dynamic TLS symbols used. unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + + bool usePackedStack(MachineFunction &MF) { + bool HasPackedStackAttr = MF.getFunction().hasFnAttribute("packed-stack"); + bool IsVarArg = MF.getFunction().isVarArg(); + bool CallConv = MF.getFunction().getCallingConv() != CallingConv::GHC; + bool BackChain = MF.getFunction().hasFnAttribute("backchain"); + bool FramAddressTaken = MF.getFrameInfo().isFrameAddressTaken(); + return HasPackedStackAttr && !IsVarArg && CallConv && !BackChain && + !FramAddressTaken; + } + + void setLowGPROffset(unsigned Offs) { LowGPROffset = Offs; } + unsigned getLowGPROffset() const { return LowGPROffset; } }; } // end namespace llvm Index: llvm/test/CodeGen/SystemZ/anyregcc-novec.ll =================================================================== --- llvm/test/CodeGen/SystemZ/anyregcc-novec.ll +++ llvm/test/CodeGen/SystemZ/anyregcc-novec.ll @@ -5,14 +5,14 @@ entry: ;CHECK-LABEL: anyregcc1 ;CHECK: stmg %r2, %r15, 16(%r15) -;CHECK: aghi %r15, -256 -;CHECK: std %f0, 384(%r15) +;CHECK: aghi %r15, -96 +;CHECK: std %f0, 224(%r15) ;CHECK: std %f1, -;CHECK: std %f2, 392(%r15) +;CHECK: std %f2, 232(%r15) ;CHECK: std %f3, -;CHECK: std %f4, 400(%r15) +;CHECK: std %f4, 240(%r15) ;CHECK: std %f5, -;CHECK: std %f6, 408(%r15) +;CHECK: std %f6, 248(%r15) ;CHECK: std %f7, ;CHECK: std %f8, ;CHECK: std %f9, @@ -26,10 +26,10 @@ ;CHECK: .cfi_offset %f2, -24 ;CHECK: .cfi_offset %f4, -16 ;CHECK: .cfi_offset %f6, -8 -;CHECK: ld %f0, 384(%r15) -;CHECK: ld %f2, 392(%r15) -;CHECK: ld %f4, 400(%r15) -;CHECK: ld %f6, 408(%r15) +;CHECK: ld %f0, 224(%r15) +;CHECK: ld %f2, 232(%r15) +;CHECK: ld %f4, 240(%r15) +;CHECK: ld %f6, 248(%r15) call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{f0},~{f1},~{f2},~{f3},~{f4},~{f5},~{f6},~{f7},~{f8},~{f9},~{f10},~{f11},~{f12},~{f13},~{f14},~{f15}"() nounwind ret void } Index: llvm/test/CodeGen/SystemZ/frame-02.ll =================================================================== --- llvm/test/CodeGen/SystemZ/frame-02.ll +++ llvm/test/CodeGen/SystemZ/frame-02.ll @@ -4,20 +4,20 @@ ; This function should require all FPRs, but no other spill slots. ; We need to save and restore 8 of the 16 FPRs, so the frame size -; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 -; (the caller-allocated part of the frame) + 224. +; should be exactly 8 * 8 = 64. The CFA offset is 160 +; (the caller-allocated part of the frame) + 64. define void @f1(float *%ptr) { ; CHECK-LABEL: f1: -; CHECK: aghi %r15, -224 -; CHECK: .cfi_def_cfa_offset 384 -; CHECK: std %f8, 216(%r15) -; CHECK: std %f9, 208(%r15) -; CHECK: std %f10, 200(%r15) -; CHECK: std %f11, 192(%r15) -; CHECK: std %f12, 184(%r15) -; CHECK: std %f13, 176(%r15) -; CHECK: std %f14, 168(%r15) -; CHECK: std %f15, 160(%r15) +; CHECK: aghi %r15, -64 +; CHECK: .cfi_def_cfa_offset 224 +; CHECK: std %f8, 56(%r15) +; CHECK: std %f9, 48(%r15) +; CHECK: std %f10, 40(%r15) +; CHECK: std %f11, 32(%r15) +; CHECK: std %f12, 24(%r15) +; CHECK: std %f13, 16(%r15) +; CHECK: std %f14, 8(%r15) +; CHECK: std %f15, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -27,15 +27,15 @@ ; CHECK: .cfi_offset %f14, -216 ; CHECK: .cfi_offset %f15, -224 ; ...main function body... -; CHECK: ld %f8, 216(%r15) -; CHECK: ld %f9, 208(%r15) -; CHECK: ld %f10, 200(%r15) -; CHECK: ld %f11, 192(%r15) -; CHECK: ld %f12, 184(%r15) -; CHECK: ld %f13, 176(%r15) -; CHECK: ld %f14, 168(%r15) -; CHECK: ld %f15, 160(%r15) -; CHECK: aghi %r15, 224 +; CHECK: ld %f8, 56(%r15) +; CHECK: ld %f9, 48(%r15) +; CHECK: ld %f10, 40(%r15) +; CHECK: ld %f11, 32(%r15) +; CHECK: ld %f12, 24(%r15) +; CHECK: ld %f13, 16(%r15) +; CHECK: ld %f14, 8(%r15) +; CHECK: ld %f15, 0(%r15) +; CHECK: aghi %r15, 64 ; CHECK: br %r14 %l0 = load volatile float, float *%ptr %l1 = load volatile float, float *%ptr @@ -92,15 +92,15 @@ ; so %f15 is the one that gets dropped. define void @f2(float *%ptr) { ; CHECK-LABEL: f2: -; CHECK: aghi %r15, -216 -; CHECK: .cfi_def_cfa_offset 376 -; CHECK: std %f8, 208(%r15) -; CHECK: std %f9, 200(%r15) -; CHECK: std %f10, 192(%r15) -; CHECK: std %f11, 184(%r15) -; CHECK: std %f12, 176(%r15) -; CHECK: std %f13, 168(%r15) -; CHECK: std %f14, 160(%r15) +; CHECK: aghi %r15, -56 +; CHECK: .cfi_def_cfa_offset 216 +; CHECK: std %f8, 48(%r15) +; CHECK: std %f9, 40(%r15) +; CHECK: std %f10, 32(%r15) +; CHECK: std %f11, 24(%r15) +; CHECK: std %f12, 16(%r15) +; CHECK: std %f13, 8(%r15) +; CHECK: std %f14, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -110,14 +110,14 @@ ; CHECK: .cfi_offset %f14, -216 ; CHECK-NOT: %f15 ; ...main function body... -; CHECK: ld %f8, 208(%r15) -; CHECK: ld %f9, 200(%r15) -; CHECK: ld %f10, 192(%r15) -; CHECK: ld %f11, 184(%r15) -; CHECK: ld %f12, 176(%r15) -; CHECK: ld %f13, 168(%r15) -; CHECK: ld %f14, 160(%r15) -; CHECK: aghi %r15, 216 +; CHECK: ld %f8, 48(%r15) +; CHECK: ld %f9, 40(%r15) +; CHECK: ld %f10, 32(%r15) +; CHECK: ld %f11, 24(%r15) +; CHECK: ld %f12, 16(%r15) +; CHECK: ld %f13, 8(%r15) +; CHECK: ld %f14, 0(%r15) +; CHECK: aghi %r15, 56 ; CHECK: br %r14 %l0 = load volatile float, float *%ptr %l1 = load volatile float, float *%ptr @@ -170,9 +170,9 @@ ; Like f1, but should require only one call-saved FPR. define void @f3(float *%ptr) { ; CHECK-LABEL: f3: -; CHECK: aghi %r15, -168 -; CHECK: .cfi_def_cfa_offset 328 -; CHECK: std %f8, 160(%r15) +; CHECK: aghi %r15, -8 +; CHECK: .cfi_def_cfa_offset 168 +; CHECK: std %f8, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK-NOT: %f9 ; CHECK-NOT: %f10 @@ -182,8 +182,8 @@ ; CHECK-NOT: %f14 ; CHECK-NOT: %f15 ; ...main function body... -; CHECK: ld %f8, 160(%r15) -; CHECK: aghi %r15, 168 +; CHECK: ld %f8, 0(%r15) +; CHECK: aghi %r15, 8 ; CHECK: br %r14 %l0 = load volatile float, float *%ptr %l1 = load volatile float, float *%ptr Index: llvm/test/CodeGen/SystemZ/frame-03.ll =================================================================== --- llvm/test/CodeGen/SystemZ/frame-03.ll +++ llvm/test/CodeGen/SystemZ/frame-03.ll @@ -6,20 +6,20 @@ ; This function should require all FPRs, but no other spill slots. ; We need to save and restore 8 of the 16 FPRs, so the frame size -; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 -; (the caller-allocated part of the frame) + 224. +; should be exactly 8 * 8 = 64. The CFA offset is 160 +; (the caller-allocated part of the frame) + 64. define void @f1(double *%ptr) { ; CHECK-LABEL: f1: -; CHECK: aghi %r15, -224 -; CHECK: .cfi_def_cfa_offset 384 -; CHECK: std %f8, 216(%r15) -; CHECK: std %f9, 208(%r15) -; CHECK: std %f10, 200(%r15) -; CHECK: std %f11, 192(%r15) -; CHECK: std %f12, 184(%r15) -; CHECK: std %f13, 176(%r15) -; CHECK: std %f14, 168(%r15) -; CHECK: std %f15, 160(%r15) +; CHECK: aghi %r15, -64 +; CHECK: .cfi_def_cfa_offset 224 +; CHECK: std %f8, 56(%r15) +; CHECK: std %f9, 48(%r15) +; CHECK: std %f10, 40(%r15) +; CHECK: std %f11, 32(%r15) +; CHECK: std %f12, 24(%r15) +; CHECK: std %f13, 16(%r15) +; CHECK: std %f14, 8(%r15) +; CHECK: std %f15, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -29,15 +29,15 @@ ; CHECK: .cfi_offset %f14, -216 ; CHECK: .cfi_offset %f15, -224 ; ...main function body... -; CHECK: ld %f8, 216(%r15) -; CHECK: ld %f9, 208(%r15) -; CHECK: ld %f10, 200(%r15) -; CHECK: ld %f11, 192(%r15) -; CHECK: ld %f12, 184(%r15) -; CHECK: ld %f13, 176(%r15) -; CHECK: ld %f14, 168(%r15) -; CHECK: ld %f15, 160(%r15) -; CHECK: aghi %r15, 224 +; CHECK: ld %f8, 56(%r15) +; CHECK: ld %f9, 48(%r15) +; CHECK: ld %f10, 40(%r15) +; CHECK: ld %f11, 32(%r15) +; CHECK: ld %f12, 24(%r15) +; CHECK: ld %f13, 16(%r15) +; CHECK: ld %f14, 8(%r15) +; CHECK: ld %f15, 0(%r15) +; CHECK: aghi %r15, 64 ; CHECK: br %r14 %l0 = load volatile double, double *%ptr %l1 = load volatile double, double *%ptr @@ -94,15 +94,15 @@ ; so %f15 is the one that gets dropped. define void @f2(double *%ptr) { ; CHECK-LABEL: f2: -; CHECK: aghi %r15, -216 -; CHECK: .cfi_def_cfa_offset 376 -; CHECK: std %f8, 208(%r15) -; CHECK: std %f9, 200(%r15) -; CHECK: std %f10, 192(%r15) -; CHECK: std %f11, 184(%r15) -; CHECK: std %f12, 176(%r15) -; CHECK: std %f13, 168(%r15) -; CHECK: std %f14, 160(%r15) +; CHECK: aghi %r15, -56 +; CHECK: .cfi_def_cfa_offset 216 +; CHECK: std %f8, 48(%r15) +; CHECK: std %f9, 40(%r15) +; CHECK: std %f10, 32(%r15) +; CHECK: std %f11, 24(%r15) +; CHECK: std %f12, 16(%r15) +; CHECK: std %f13, 8(%r15) +; CHECK: std %f14, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -112,14 +112,14 @@ ; CHECK: .cfi_offset %f14, -216 ; CHECK-NOT: %f15 ; ...main function body... -; CHECK: ld %f8, 208(%r15) -; CHECK: ld %f9, 200(%r15) -; CHECK: ld %f10, 192(%r15) -; CHECK: ld %f11, 184(%r15) -; CHECK: ld %f12, 176(%r15) -; CHECK: ld %f13, 168(%r15) -; CHECK: ld %f14, 160(%r15) -; CHECK: aghi %r15, 216 +; CHECK: ld %f8, 48(%r15) +; CHECK: ld %f9, 40(%r15) +; CHECK: ld %f10, 32(%r15) +; CHECK: ld %f11, 24(%r15) +; CHECK: ld %f12, 16(%r15) +; CHECK: ld %f13, 8(%r15) +; CHECK: ld %f14, 0(%r15) +; CHECK: aghi %r15, 56 ; CHECK: br %r14 %l0 = load volatile double, double *%ptr %l1 = load volatile double, double *%ptr @@ -172,9 +172,9 @@ ; Like f1, but should require only one call-saved FPR. define void @f3(double *%ptr) { ; CHECK-LABEL: f3: -; CHECK: aghi %r15, -168 -; CHECK: .cfi_def_cfa_offset 328 -; CHECK: std %f8, 160(%r15) +; CHECK: aghi %r15, -8 +; CHECK: .cfi_def_cfa_offset 168 +; CHECK: std %f8, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK-NOT: %f9 ; CHECK-NOT: %f10 @@ -184,8 +184,8 @@ ; CHECK-NOT: %f14 ; CHECK-NOT: %f15 ; ...main function body... -; CHECK: ld %f8, 160(%r15) -; CHECK: aghi %r15, 168 +; CHECK: ld %f8, 0(%r15) +; CHECK: aghi %r15, 8 ; CHECK: br %r14 %l0 = load volatile double, double *%ptr %l1 = load volatile double, double *%ptr Index: llvm/test/CodeGen/SystemZ/frame-04.ll =================================================================== --- llvm/test/CodeGen/SystemZ/frame-04.ll +++ llvm/test/CodeGen/SystemZ/frame-04.ll @@ -5,20 +5,20 @@ ; This function should require all FPRs, but no other spill slots. ; We need to save and restore 8 of the 16 FPRs, so the frame size -; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 -; (the caller-allocated part of the frame) + 224. +; should be exactly 8 * 8 = 64. The CFA offset is 160 +; (the caller-allocated part of the frame) + 64. define void @f1(fp128 *%ptr) { ; CHECK-LABEL: f1: -; CHECK: aghi %r15, -224 -; CHECK: .cfi_def_cfa_offset 384 -; CHECK: std %f8, 216(%r15) -; CHECK: std %f9, 208(%r15) -; CHECK: std %f10, 200(%r15) -; CHECK: std %f11, 192(%r15) -; CHECK: std %f12, 184(%r15) -; CHECK: std %f13, 176(%r15) -; CHECK: std %f14, 168(%r15) -; CHECK: std %f15, 160(%r15) +; CHECK: aghi %r15, -64 +; CHECK: .cfi_def_cfa_offset 224 +; CHECK: std %f8, 56(%r15) +; CHECK: std %f9, 48(%r15) +; CHECK: std %f10, 40(%r15) +; CHECK: std %f11, 32(%r15) +; CHECK: std %f12, 24(%r15) +; CHECK: std %f13, 16(%r15) +; CHECK: std %f14, 8(%r15) +; CHECK: std %f15, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -28,15 +28,15 @@ ; CHECK: .cfi_offset %f14, -216 ; CHECK: .cfi_offset %f15, -224 ; ...main function body... -; CHECK: ld %f8, 216(%r15) -; CHECK: ld %f9, 208(%r15) -; CHECK: ld %f10, 200(%r15) -; CHECK: ld %f11, 192(%r15) -; CHECK: ld %f12, 184(%r15) -; CHECK: ld %f13, 176(%r15) -; CHECK: ld %f14, 168(%r15) -; CHECK: ld %f15, 160(%r15) -; CHECK: aghi %r15, 224 +; CHECK: ld %f8, 56(%r15) +; CHECK: ld %f9, 48(%r15) +; CHECK: ld %f10, 40(%r15) +; CHECK: ld %f11, 32(%r15) +; CHECK: ld %f12, 24(%r15) +; CHECK: ld %f13, 16(%r15) +; CHECK: ld %f14, 8(%r15) +; CHECK: ld %f15, 0(%r15) +; CHECK: aghi %r15, 64 ; CHECK: br %r14 %l0 = load volatile fp128, fp128 *%ptr %l1 = load volatile fp128, fp128 *%ptr @@ -69,14 +69,14 @@ ; so %f13+%f15 is the pair that gets dropped. define void @f2(fp128 *%ptr) { ; CHECK-LABEL: f2: -; CHECK: aghi %r15, -208 -; CHECK: .cfi_def_cfa_offset 368 -; CHECK: std %f8, 200(%r15) -; CHECK: std %f9, 192(%r15) -; CHECK: std %f10, 184(%r15) -; CHECK: std %f11, 176(%r15) -; CHECK: std %f12, 168(%r15) -; CHECK: std %f14, 160(%r15) +; CHECK: aghi %r15, -48 +; CHECK: .cfi_def_cfa_offset 208 +; CHECK: std %f8, 40(%r15) +; CHECK: std %f9, 32(%r15) +; CHECK: std %f10, 24(%r15) +; CHECK: std %f11, 16(%r15) +; CHECK: std %f12, 8(%r15) +; CHECK: std %f14, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -86,13 +86,13 @@ ; CHECK-NOT: %f13 ; CHECK-NOT: %f15 ; ...main function body... -; CHECK: ld %f8, 200(%r15) -; CHECK: ld %f9, 192(%r15) -; CHECK: ld %f10, 184(%r15) -; CHECK: ld %f11, 176(%r15) -; CHECK: ld %f12, 168(%r15) -; CHECK: ld %f14, 160(%r15) -; CHECK: aghi %r15, 208 +; CHECK: ld %f8, 40(%r15) +; CHECK: ld %f9, 32(%r15) +; CHECK: ld %f10, 24(%r15) +; CHECK: ld %f11, 16(%r15) +; CHECK: ld %f12, 8(%r15) +; CHECK: ld %f14, 0(%r15) +; CHECK: aghi %r15, 48 ; CHECK: br %r14 %l0 = load volatile fp128, fp128 *%ptr %l1 = load volatile fp128, fp128 *%ptr @@ -122,10 +122,10 @@ ; numerical order so the pair should be %f8+%f10. define void @f3(fp128 *%ptr) { ; CHECK-LABEL: f3: -; CHECK: aghi %r15, -176 -; CHECK: .cfi_def_cfa_offset 336 -; CHECK: std %f8, 168(%r15) -; CHECK: std %f10, 160(%r15) +; CHECK: aghi %r15, -16 +; CHECK: .cfi_def_cfa_offset 176 +; CHECK: std %f8, 8(%r15) +; CHECK: std %f10, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f10, -176 ; CHECK-NOT: %f9 @@ -135,9 +135,9 @@ ; CHECK-NOT: %f14 ; CHECK-NOT: %f15 ; ...main function body... -; CHECK: ld %f8, 168(%r15) -; CHECK: ld %f10, 160(%r15) -; CHECK: aghi %r15, 176 +; CHECK: ld %f8, 8(%r15) +; CHECK: ld %f10, 0(%r15) +; CHECK: aghi %r15, 16 ; CHECK: br %r14 %l0 = load volatile fp128, fp128 *%ptr %l1 = load volatile fp128, fp128 *%ptr Index: llvm/test/CodeGen/SystemZ/frame-19.ll =================================================================== --- llvm/test/CodeGen/SystemZ/frame-19.ll +++ llvm/test/CodeGen/SystemZ/frame-19.ll @@ -96,10 +96,11 @@ ret void } -; Like f1, but no 16-byte slot should be needed. +; Like f1, but no 16-byte slot should be needed, and no outgoing reg save +; area of 160 bytes. define void @f2(<16 x i8> *%ptr) { ; CHECK-LABEL: f2: -; CHECK: aghi %r15, -224 +; CHECK: aghi %r15, -64 ; CHECK-DAG: std %f8, ; CHECK-DAG: std %f9, ; CHECK-DAG: std %f10, @@ -118,7 +119,7 @@ ; CHECK-DAG: ld %f13, ; CHECK-DAG: ld %f14, ; CHECK-DAG: ld %f15, -; CHECK: aghi %r15, 224 +; CHECK: aghi %r15, 64 ; CHECK: br %r14 %v0 = load volatile <16 x i8>, <16 x i8> *%ptr %v1 = load volatile <16 x i8>, <16 x i8> *%ptr @@ -190,7 +191,7 @@ ; Like f2, but only %f8 should be saved. define void @f3(<16 x i8> *%ptr) { ; CHECK-LABEL: f3: -; CHECK: aghi %r15, -168 +; CHECK: aghi %r15, -8 ; CHECK-DAG: std %f8, ; CHECK-NOT: vst {{.*}}(%r15) ; CHECK-NOT: vl {{.*}}(%r15) @@ -202,7 +203,7 @@ ; CHECK-NOT: %v14 ; CHECK-NOT: %v15 ; CHECK-DAG: ld %f8, -; CHECK: aghi %r15, 168 +; CHECK: aghi %r15, 8 ; CHECK: br %r14 %v0 = load volatile <16 x i8>, <16 x i8> *%ptr %v1 = load volatile <16 x i8>, <16 x i8> *%ptr Index: llvm/test/CodeGen/SystemZ/frame-20.ll =================================================================== --- llvm/test/CodeGen/SystemZ/frame-20.ll +++ llvm/test/CodeGen/SystemZ/frame-20.ll @@ -5,20 +5,20 @@ ; This function should require all FPRs, but no other spill slots. ; We need to save and restore 8 of the 16 FPRs, so the frame size -; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 -; (the caller-allocated part of the frame) + 224. +; should be exactly 8 * 8 = 64. The CFA offset is 160 +; (the caller-allocated part of the frame) + 64. define void @f1(double *%ptr) { ; CHECK-LABEL: f1: -; CHECK: aghi %r15, -224 -; CHECK: .cfi_def_cfa_offset 384 -; CHECK: std %f8, 216(%r15) -; CHECK: std %f9, 208(%r15) -; CHECK: std %f10, 200(%r15) -; CHECK: std %f11, 192(%r15) -; CHECK: std %f12, 184(%r15) -; CHECK: std %f13, 176(%r15) -; CHECK: std %f14, 168(%r15) -; CHECK: std %f15, 160(%r15) +; CHECK: aghi %r15, -64 +; CHECK: .cfi_def_cfa_offset 224 +; CHECK: std %f8, 56(%r15) +; CHECK: std %f9, 48(%r15) +; CHECK: std %f10, 40(%r15) +; CHECK: std %f11, 32(%r15) +; CHECK: std %f12, 24(%r15) +; CHECK: std %f13, 16(%r15) +; CHECK: std %f14, 8(%r15) +; CHECK: std %f15, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -35,15 +35,15 @@ ; CHECK-DAG: vlrepg %v23, 0(%r2) ; CHECK-DAG: vlrepg %v24, 0(%r2) ; CHECK-DAG: vlrepg %v31, 0(%r2) -; CHECK: ld %f8, 216(%r15) -; CHECK: ld %f9, 208(%r15) -; CHECK: ld %f10, 200(%r15) -; CHECK: ld %f11, 192(%r15) -; CHECK: ld %f12, 184(%r15) -; CHECK: ld %f13, 176(%r15) -; CHECK: ld %f14, 168(%r15) -; CHECK: ld %f15, 160(%r15) -; CHECK: aghi %r15, 224 +; CHECK: ld %f8, 56(%r15) +; CHECK: ld %f9, 48(%r15) +; CHECK: ld %f10, 40(%r15) +; CHECK: ld %f11, 32(%r15) +; CHECK: ld %f12, 24(%r15) +; CHECK: ld %f13, 16(%r15) +; CHECK: ld %f14, 8(%r15) +; CHECK: ld %f15, 0(%r15) +; CHECK: aghi %r15, 64 ; CHECK: br %r14 %l0 = load volatile double, double *%ptr %l1 = load volatile double, double *%ptr @@ -148,15 +148,15 @@ ; so %f15 is the one that gets dropped. define void @f2(double *%ptr) { ; CHECK-LABEL: f2: -; CHECK: aghi %r15, -216 -; CHECK: .cfi_def_cfa_offset 376 -; CHECK: std %f8, 208(%r15) -; CHECK: std %f9, 200(%r15) -; CHECK: std %f10, 192(%r15) -; CHECK: std %f11, 184(%r15) -; CHECK: std %f12, 176(%r15) -; CHECK: std %f13, 168(%r15) -; CHECK: std %f14, 160(%r15) +; CHECK: aghi %r15, -56 +; CHECK: .cfi_def_cfa_offset 216 +; CHECK: std %f8, 48(%r15) +; CHECK: std %f9, 40(%r15) +; CHECK: std %f10, 32(%r15) +; CHECK: std %f11, 24(%r15) +; CHECK: std %f12, 16(%r15) +; CHECK: std %f13, 8(%r15) +; CHECK: std %f14, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK: .cfi_offset %f9, -176 ; CHECK: .cfi_offset %f10, -184 @@ -166,14 +166,14 @@ ; CHECK: .cfi_offset %f14, -216 ; CHECK-NOT: %v15 ; CHECK-NOT: %f15 -; CHECK: ld %f8, 208(%r15) -; CHECK: ld %f9, 200(%r15) -; CHECK: ld %f10, 192(%r15) -; CHECK: ld %f11, 184(%r15) -; CHECK: ld %f12, 176(%r15) -; CHECK: ld %f13, 168(%r15) -; CHECK: ld %f14, 160(%r15) -; CHECK: aghi %r15, 216 +; CHECK: ld %f8, 48(%r15) +; CHECK: ld %f9, 40(%r15) +; CHECK: ld %f10, 32(%r15) +; CHECK: ld %f11, 24(%r15) +; CHECK: ld %f12, 16(%r15) +; CHECK: ld %f13, 8(%r15) +; CHECK: ld %f14, 0(%r15) +; CHECK: aghi %r15, 56 ; CHECK: br %r14 %l0 = load volatile double, double *%ptr %l1 = load volatile double, double *%ptr @@ -274,14 +274,14 @@ ; Like f1, but should require only one call-saved FPR. define void @f3(double *%ptr) { ; CHECK-LABEL: f3: -; CHECK: aghi %r15, -168 -; CHECK: .cfi_def_cfa_offset 328 -; CHECK: std %f8, 160(%r15) +; CHECK: aghi %r15, -8 +; CHECK: .cfi_def_cfa_offset 168 +; CHECK: std %f8, 0(%r15) ; CHECK: .cfi_offset %f8, -168 ; CHECK-NOT: {{%[fv]9}} ; CHECK-NOT: {{%[fv]1[0-5]}} -; CHECK: ld %f8, 160(%r15) -; CHECK: aghi %r15, 168 +; CHECK: ld %f8, 0(%r15) +; CHECK: aghi %r15, 8 ; CHECK: br %r14 %l0 = load volatile double, double *%ptr %l1 = load volatile double, double *%ptr Index: llvm/test/CodeGen/SystemZ/frame-22.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/SystemZ/frame-22.ll @@ -0,0 +1,87 @@ +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; +; Test the packed stack layout. + +; Test spill/restore of an FPR and a GPR. +define void @f1() #0 { +; CHECK-LABEL: f1: +; CHECK: stmg %r12, %r15, 128(%r15) +; CHECK-NEXT: .cfi_offset %r12, -32 +; CHECK-NEXT: .cfi_offset %r15, -8 +; CHECK-NEXT: std %f8, 120(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -40 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld %f8, 120(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r12, %r15, 128(%r15) +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f8},~{r12}"() nounwind + ret void +} + +; Test spill/restore with anyregcc, including an FP argument register. +define anyregcc void @f2() #0 { +; CHECK-LABEL: f2: +; CHECK: stmg %r3, %r15, 56(%r15) +; CHECK-NEXT: .cfi_offset %r3, -104 +; CHECK-NEXT: .cfi_offset %r15, -8 +; CHECK-NEXT: std %f0, 48(%r15) # 8-byte Folded Spill +; CHECK-NEXT: std %f1, 40(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f0, -112 +; CHECK-NEXT: .cfi_offset %f1, -120 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld %f0, 48(%r15) # 8-byte Folded Reload +; CHECK-NEXT: ld %f1, 40(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r3, %r15, 56(%r15) +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f0},~{f1},~{r3}"() nounwind + ret void +} + +; Test spill/restore in local area with incoming stack arguments. +define i64 @f3(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, + double %A, double %B, double %C, double %D, double %E) #0 { +; CHECK-LABEL: f3: +; CHECK: std %f8, 152(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -8 +; CHECK-NEXT: ld %f0, 168(%r15) +; CHECK-NEXT: cgdbr %r2, 5, %f0 +; CHECK-NEXT: ag %r2, 160(%r15) +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld %f8, 152(%r15) # 8-byte Folded Reload +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f8}"() nounwind + %Ei = fptosi double %E to i64 + %S = add i64 %f, %Ei + ret i64 %S +} + +; Test spill/restore in local area with outgoing stack arguments. +define i64 @f4() #0 { +; CHECK-LABEL: f4: +; CHECK: stmg %r6, %r15, 80(%r15) +; CHECK-NEXT: .cfi_offset %r6, -80 +; CHECK-NEXT: .cfi_offset %r14, -16 +; CHECK-NEXT: .cfi_offset %r15, -8 +; CHECK-NEXT: aghi %r15, -104 +; CHECK-NEXT: .cfi_def_cfa_offset 264 +; CHECK-NEXT: std %f8, 176(%r15) # 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset %f8, -88 +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: llihh %r0, 16404 +; CHECK-NEXT: stg %r0, 168(%r15) +; CHECK: mvghi 160(%r15), 6 +; CHECK-NEXT: brasl %r14, f3@PLT +; CHECK-NEXT: ld %f8, 176(%r15) # 8-byte Folded Reload +; CHECK-NEXT: lmg %r6, %r15, 184(%r15) +; CHECK-NEXT: br %r14 + call void asm sideeffect "", "~{f8}"() nounwind + %C = call i64 @f3 (i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, + double 1.0, double 2.0, double 3.0, double 4.0, double 5.0) + ret i64 %C +} + +attributes #0 = { "packed-stack"="true" }